
    DUf{                     Z    d dl Z d dlZddlmZ d dlZ	 	 	 d	dZ	 	 	 	 	 	 	 	 	 	 	 	 	 d
dZdS )    N   )helpersFTc	                 Z   |s| }	n< | j         dd|i|}
|r)|
                                }	t          j        |
           n|
}	 t	          |	|          |i |}|rt          j        |	           |r; ||          }t          |t          j                  rt          j        |           |S |S )z
    Given a BedTool object `orig_bedtool`, call its `method` with `args` and
    `kwargs` and then call `reduce_func` on the results.

    See parallel_apply docstring for details

    gN )shufflesortr   close_or_deletegetattr
isinstance
pybedtoolsBedTool)orig_bedtoolshuffle_kwargs	genome_fnmethodmethod_argsmethod_kwargsr	   r   reduce_functo_useshuffledresultreduceds                P/var/www/html/software/conda/lib/python3.11/site-packages/pybedtools/parallel.py_parallel_wrapr      s    (  '<'FF)F~FF 	]]__F#H----F$WVV$$kC]CCF (''' +f%%fj011 	,#F+++      c              #     K   |pi }|pd}t          |t                    s4t          |t                    st          dt	          |          z            |pi }|r|rt          d          r'|s%|st          d          t          j        |          }t          | |||||||
	  	        fd|	dk    r,t          |          D ]}t          di  |          V  dS |r|nt          j        |	          fd	t          |          D             }t          |          D ]]\  }}|                                V  |r@t          j                            d
|z             t          j                                         ^dS )aA  
    Call an arbitrary BedTool method many times in parallel.

    An example use-case is to generate a null distribution of intersections,
    and then compare this to the actual intersections.

    **Important:** due to a known file handle leak in BedTool.__len__, it's
    best to simply check the number of lines in the file, as in the below
    function. This works because BEDTools programs strip any non-interval lines
    in the results.

    >>> # set up example BedTools
    >>> a = pybedtools.example_bedtool('a.bed')
    >>> b = pybedtools.example_bedtool('b.bed')

    >>> # Method of `a` to call:
    >>> method = 'intersect'

    >>> # Kwargs provided to `a.intersect` each iteration
    >>> method_kwargs = dict(b=b, u=True)

    >>> # Function that will be called on the results of
    >>> # `a.intersect(**method_kwargs)`.
    >>> def reduce_func(x):
    ...     return sum(1 for _ in open(x.fn))

    >>> # Create a small artificial genome for this test (generally you'd
    >>> # use an assembly name, like "hg19"):
    >>> genome = dict(chr1=(0, 1000))

    >>> # Do 10 iterations using 1 process for this test (generally you'd
    >>> # use 1000+ iterations, and as many processes as you have CPUs)
    >>> results = pybedtools.parallel.parallel_apply(a, method, genome=genome,
    ... method_kwargs=method_kwargs, iterations=10, processes=1,
    ... reduce_func=reduce_func, debug=True, report_iterations=True)

    >>> # get results
    >>> print(list(results))
    [1, 0, 1, 2, 4, 2, 2, 1, 2, 4]

    >>> # We can compare this to the actual intersection:
    >>> reduce_func(a.intersect(**method_kwargs))
    3

    Alternatively, we could use the `a.jaccard` method, which already does the
    reduction to a dictionary.  However, the Jaccard method requires the input
    to be sorted.  Here, we specify `sort=True` to sort each shuffled BedTool
    before calling its `jaccard` method.

    >>> from pybedtools.parallel import parallel_apply
    >>> a = pybedtools.example_bedtool('a.bed')
    >>> results = parallel_apply(a, method='jaccard', method_args=(b,),
    ... genome=genome, iterations=3, processes=1, sort=True, debug=True)
    >>> for i in results:
    ...     print(sorted(i.items()))
    [('intersection', 12), ('jaccard', 0.0171184), ('n_intersections', 1), ('union', 701)]
    [('intersection', 0), ('jaccard', 0.0), ('n_intersections', 0), ('union', 527)]
    [('intersection', 73), ('jaccard', 0.137996), ('n_intersections', 1), ('union', 529)]

    Parameters
    ----------
    orig_bedtool : BedTool

    method : str
        The method of `orig_bedtool` to run

    method_args : tuple
        Passed directly to getattr(orig_bedtool, method)()

    method_kwargs : dict
        Passed directly to getattr(orig_bedtool, method)()

    shuffle : bool
        If True, then `orig_bedtool` will be shuffled at each iteration and
        that shuffled version's `method` will be called with `method_args` and
        `method_kwargs`.

    shuffle_kwargs : dict
        If `shuffle` is True, these are passed to `orig_bedtool.shuffle()`.
        You do not need to pass the genome here; that's handled separately by
        the `genome` and `genome_fn` kwargs.

    iterations : int
        Number of iterations to perform

    genome : string or dict
        If string, then assume it is the assembly name (e.g., hg19) and get
        a dictionary of chromsizes for that assembly, then converts to
        a filename.

    genome_fn : str
        Mutually exclusive with `genome`; `genome_fn` must be an existing
        filename with the chromsizes.  Use the `genome` kwarg instead if you'd
        rather supply an assembly or dict.

    reduce_func : callable
        Function or other callable object that accepts, as its only argument,
        the results from `orig_bedtool.method()`.  For example, if you care
        about the number of results, then you can use `reduce_func=len`.

    processes : int
        Number of processes to run.  If `processes=1`, then multiprocessing is
        not used (making it much easier to debug).  This argument is ignored if
        `_orig_pool` is provided.

    sort : bool
        If both `shuffle` and `sort` are True, then the shuffled BedTool will
        then be sorted.  Use this if `method` requires sorted input.

    _orig_pool : multiprocessing.Pool instance
        If provided, uses `_orig_pool` instead of creating one.  In this case,
        `processes` will be ignored.

    debug : bool
        If True, then use the current iteration index as the seed to shuffle.

    report_iterations : bool
        If True, then report the number of iterations to stderr.
    r   z+method_args must be a list or tuple, got %sz1only of of genome_fn or genome should be providedz<shuffle=True, so either genome_fn or genome must be provided)	r   r   r   r   r   r   r   r   r	   c                 &    rr| |d         d<   |S )Nr   seedr   )ikwargsdebugr   s     r   add_seedz parallel_apply.<locals>.add_seed   s'     	1W 	1/0F#$V,r   r   Nc           
      \    g | ](}                     t          d  |                    )S )r   )apply_asyncr   ).0it_parallel_wrap_kwargsr$   ps     r   
<listcomp>z"parallel_apply.<locals>.<listcomp>   sF        	
nb((27L*M*MNN  r   z%s)r   listtuple
ValueErrortyper   chromsizes_to_filedictranger   multiprocessingPool	enumerategetsysstderrwriteflush)r   r   genomer   r   r   r   r   r   	processesr	   
_orig_pool
iterationsr#   report_iterationsr(   resultsr!   rr)   r$   r*   s          `     `     @@@r   parallel_applyrB   3   s;     R $)rN#Kk4(( 
K1O1O 
9D<M<MM
 
 	
 "'RM NV NLMMM > 	>  U   #5f==I !%#
 
 
     
 A~~
## 	H 	HB GG88B0E#F#FGGGGGG , ++     
##  G '""  1eegg 	JVaZ(((J	 r   )FTN)NNNNNTNr   FNr   FF)r7   r3    r   r   r   rB   r   r   r   <module>rD      s    



               
) ) ) )^ 	
     r   