
    DUfX                         d dl Zd dlZd dlZd dlZddlmZ ddlmZ d Z	d Z
d Zedddd	 ed
          defd            ZdS )    N   )coverage   )pool_decoratorc                 V   t           j                            | d         |          | d<   | d         dk    t          t	          |           t
          j                  r	|          } nBt          t	          |           t                    r fd|                                 D             } | S )Ncountr   c                 (    i | ]\  }}||         S  r
   .0karrmasks      Q/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/api/sample.py
<dictcomp>z(sample_pixels_approx.<locals>.<dictcomp>   #    <<<61c!SY<<<    )	nprandombinomial
issubclasstypepd	DataFramedictitems)pixelsfracr   s     @r   sample_pixels_approxr      s    i(($??F7O'?QD$v,,-- =	DLL$	'	' =<<<<V\\^^<<<Mr   c                 .   t          j        t          j        | d                             }|d         }|j        d         }t           j                            ||d          }t          j        ||d          }t          j        ||          }|| d<   | d         dk    t          t          |           t          j                  r	|          } nBt          t          |           t                    r fd	|                                 D             } | S )
Nr   r   F)sizereplaceright)side)	minlengthc                 (    i | ]\  }}||         S r
   r
   r   s      r   r   z'sample_pixels_exact.<locals>.<dictcomp>)   r   r   )r   cumsumasarrayshaper   choicesearchsortedbincountr   r   r   r   r   r   )	r   r   cumcounttotaln_pixelsrandom_contactsloc
new_countsr   s	           @r   sample_pixels_exactr4      s    yF7O4455HRLE~a H i&&u5%&HHO /(O'
B
B
BC SH555J F7O'?QD$v,,-- =	DLL$	'	' =<<<<V\\^^<<<Mr   c                     | d         S )Nr   r
   )chunks    r   _extract_pixel_chunkr7   -   s    ?r   Fg    cAc	                    t          t          |           t                    rt          j        |           } |||n|||||| j        d         z  }ne|T|R|P| j                            dt          j        t          |           d         dz  t                              }	||	z  }nt          d          |dk    rt          d	          |rt          j        || j        d         z                                t                    }t          |                                 dd         |          }
t          j        ||                                 dd         |
d
           dS t          j                            | d||                              t*                                        t,          |          }t          j        ||                                 dd         g d         t/          |          d
           dS )a  
    Pick a random subset of contacts from a Hi-C map.

    Parameters
    ----------
    clr : cooler.Cooler or str
        A Cooler or a path/URI to a Cooler with input data.

    out_clr_path : str
        A path/URI to the output.

    count : int
        The target number of contacts in the sample.
        Mutually exclusive with `cis_count` and `frac`.

    cis_count : int
        The target number of cis contacts in the sample.
        Mutually exclusive with `count` and `frac`.

    frac : float
        The target sample size as a fraction of contacts in the original
        dataset. Mutually exclusive with `count` and `cis_count`.

    exact : bool
        If True, the resulting sample size will exactly match the target value.
        Exact sampling will load the whole pixel table into memory!
        If False, binomial sampling will be used instead and the sample size
        will be randomly distributed around the target value.

    chunksize : int
        The number of pixels loaded and processed per step of computation.

    nproc : int, optional
        How many processes to use for calculation. Ignored if map_functor is passed.
        
    map_functor : callable, optional
        Map function to dispatch the matrix chunks to workers.
        If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
        If nproc=1 this defaults the builtin map. 

    Nsumcisr   r   )dtypezIPlease specify exactly one argument among `count`, `cis_count` and `frac`g      ?zNThe number of contacts in a sample cannot exceed that in the original dataset.T)orderedF)include_binsmap	chunksize)r   )chromstartend)r   r   strcoolerCoolerinfogetr   r9   r   int
ValueErrorroundastyper4   r   create_coolerbinsparallelsplitpiper7   r   iter)clrout_clr_pathr   	cis_countr   exactr?   nprocmap_functor	cis_totalr   pipelines               r   samplerZ   0   s   j $s))S!! !mC  EMi.?	%+	0Asx&	%-I,AHLLx}}Q/?1/DC(P(P(PQQ	9$
 
 	

 czz,
 
 	

  
/0077<<$SZZ\\!!!_e<<\388::aaa=&$OOOOOO O!!%[I "   T&''T&TT22 	 	HHJJqqqM3334NN		
 	
 	
 	
 	
 	
r   )numpyr   pandasr   rD   cooler.parallelr   
lib.commonr   r   r4   r7   rH   r>   rZ   r
   r   r   <module>r_      s                       ' ' ' ' ' '    .    	
c#hh
^
 ^
 ^
 ^
 ^
 ^
r   