
    DUf                         d Z ddlZddlZddlZddlmZ ddlZddlmZ  ej        d          Z	d Z
d Zd Zd	 Z	 	 	 	 	 ddZdS )z
Interface between pybedtools and the R package VennDiagram.

Rather than depend on the user to have rpy2 installed, this simply writes an
R script that can be edited and tweaked by the user before being run in R.
    N)helpers)OrderedDictzb
library(VennDiagram)
venn.diagram(
    x=$x,
    filename=$filename,
    category.names = $names
c                     g }| D ]W}t          |t          j                  r#t          |                              dd          }|                    d|z             Xdd                    |          z  S )z
    Convert items in `x` to a string, and replace tabs with pipes in Interval
    string representations.  Put everything into an R vector and return as one
    big string.
    	|"%s"zc(%s),)
isinstance
pybedtoolsIntervalstrreplaceappendjoin)xitemsis      Z/var/www/html/software/conda/lib/python3.11/site-packages/pybedtools/contrib/venn_maker.py_list_to_R_syntaxr      st     E ! !a,-- 	*AtS))AVaZ    SXXe__$$    c           	          g }t          |                                           D ]-\  }}|                    d|dt          |                     .dd                    |          z  S )zI
    Calls _list_to_R_syntax for each item.  Returns one big string.
    "z" = zlist(%s), )listr   r   r   r   )dr   keyvals       r   _dict_to_R_named_listr   +   so     EOO B BSCCC):3)?)?)?@AAAA		%((((r   c                     t          j        | j        t          | j                  t          | j                  g          S )z=
    Convert a feature of any format into a BED3 format.
    )r   create_interval_from_listchromr   startstop)features    r   	truncatorr%   5   s9     /	GM**C,=,=>  r   c                 F   t          |           dk    r| d                             t                                                    }| d                             t                                                    }||z
                      ||z             }||fS t          |           dk    r| d                             t                                                    }| d                             t                                                    }| d                             t                                                    }||z
                      ||z             }||z
  |z
                      ||z                                 ||z
  |z             }|||fS t          |           dk    rv| d                             t                                                    }| d                             t                                                    }| d                             t                                                    }| d                             t                                                    }||z
                      ||z             }||z
  |z
                      ||z                                 ||z
  |z             }||z
  |z
  |z
                      ||z                                 ||z
  |z                                 ||z
  |z
  |z             }||||fS dS )at  
    Perform interval intersections such that the end products have identical     features for overlapping intervals.

    The VennDiagram package does *set* intersection, not *interval*
    intersection.  So the goal here is to represent intersecting intervals as
    intersecting sets of strings.

    Doing a simple BEDTools intersectBed call doesn't do the trick (even with
    the -u argument).  As a concrete example, what would the string be for an
    intersection of the feature "chr1:1-100" in file `x` and "chr1:50-200" in
    file `y`?

    The method used here is to substitute the intervals in `y` that overlap `x`
    with the corresponding elements in `x`.  This means that in the resulting
    sets, the overlapping features are identical.  To follow up with the
    example, both `x` and `y` would have an item "chr1:50-200" in their sets,
    simply indicating *that* one interval overlapped.

    Venn diagrams are not well suited for nested overlaps or multi-overlaps.
    To illustrate, try drawing the 2-way Venn diagram of the following two
    files. Specifically, what number goes in the middle -- the number of
    features in `x` that intersect `y` (1) or the number of features in `y`
    that intersect `x` (2)?::

        x:
            chr1  1  100
            chr1 500 6000

        y:
            chr1 50 100
            chr1 80 200
            chr9 777 888

    In this case, this function will return the following sets::

        x:
            chr1:1-100
            chr1:500-6000

        y:
            chr1:1-100
            chr9:777-888

    This means that while `x` does not change in length, `y` can.  For example,
    if there are 2 features in `x` that overlap one feature in `y`, then `y`
    will gain those two features in place of its single original feature.

    This strategy is extended for multiple intersections -- see the source for
    details.
       r            N)leneachr%   saveascat)r   r   ynew_yznew_zqnew_qs           r   cleaned_intersectr5   >   s   h 5zzQ!HMM)$$++--!HMM)$$++-- QAE""%x
5zzQ!HMM)$$++--!HMM)$$++--!HMM)$$++-- QAE"" QA&&**AEQ;77%
5zzQ!HMM)$$++--!HMM)$$++--!HMM)$$++--!HMM)$$++-- QAE"" QA&&**AEQ;77 QQ##AE**..A{;;??QaPP%%%' r   Fc                 N   |d}nd|z  }|ddt          |                    }g }| D ]E}t          |t          j                  st          j        |          }|                    |           Ft          |          }t          t          t          ||                              }	t          
                    t          |	          |t          |                    }
|r|
dd                    |          z   z  }
|
dz  }
|st          j                                        }n|}t          |d	          }|                    |
           |                                 |d
z   }|rt          j        j        st)          j                     t,          j                            t          j        j        d          dd||g}t3          j        |t2          j        t2          j                  }|                                \  }}|s|r t;          d|           t;          d|           |s|
S dS )a  
    Given a list of interval files, write an R script to create a Venn     diagram of overlaps (and optionally run it).

    The R script calls the venn.diagram function of the R package VennDiagram
    for extremely flexible Venn and Euler diagram creation.  Uses
    `cleaned_intersect()` to create string representations of shared intervals.

    `beds` is a list of up to 4 filenames or BedTools.

    `names` is a list of names to use for the Venn diagram, in the same order
    as `beds`. Default is "abcd"[:len(beds)].

    `figure_filename` is the TIFF file to save the figure as.

    `script_filename` is the optional filename to write the R script to

    `additional_args` is list that will be inserted into the R script,
    verbatim.  For example, to use scaled Euler diagrams with different colors,
    use::

        additional_args = ['euler.d=TRUE',
                           'scaled=TRUE',
                           'cat.col=c("red","blue")']

    If `run` is True, then assume R is installed, is on the path, and has
    VennDiagram installed . . . and run the script.  The resulting filename
    will be saved as `figure_filename`.
    NNULLr   abcd)r   filenamenamesr	   r   )wz.RoutRCMDBATCH)stdoutstderrzstdout:zstderr:)r+   r
   r   BedToolr   r5   r   r   ziptemplate
substituter   r   r   _tmpopenwriteclosesettings_R_installedr   _check_for_Rospath_R_path
subprocessPopenPIPEcommunicateprint)bedsr:   figure_filenamescript_filenameadditional_argsrun_bedsbedcleanedresultssfnfoutoutcmdspr@   rA   s                     r   
venn_makerrd      s   L   ?2}{T{#E  #z122 	*$S))CS&&G$s5'223344G

(
( && 	 	 	A
  .	S499_----HA $$&&C==DJJqMMMJJLLL
w,C
 %"/ 	# """Z08#>>wPRTWXT*/*/RRR 	%V 	%)V$$$)V$$$ 4r   )NNNNF)__doc__rM   stringr   r   rP   collectionsr   TemplaterD   r   r   r%   r5   rd    r   r   <module>rj      s     
			                # # # # # # 6? % % %) ) )  `& `& `&J [ [ [ [ [ [r   