
    DUf              	       ~    d dl Zd dlZddlmZ ddlmZ ddZd Z	dd	Z
ed ed
          dddddefd            ZdS )    N   )is_cooler_balanced)pool_decoratorbalancedc                 t    | d         }||d                  ||d                  z  |d         z  | d         |<   | S )z
    Multiply raw pixel counts by the balancing bias and return a modified
    chunk with an additional column named balanced_column_name
    pixelsbin1_idbin2_idcount )chunkbiasbalanced_column_namer   s       S/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/api/coverage.py_apply_balancingr      sG    
 8_F,0	1B,Cd6R[K\F],]`fgn`o,oE(O()L    c                     |dk    rDt          j        | d         d         | d         d         z
            |k     }d| d         d         |<   | S )Nr   r   r	   r
   r   )npabs)r   n_diagsmasks      r   _zero_diagsr      sO    {{veHoi05?93MMNNQXX)*h &Lr   r   c                 D   | d         }| d         }t          |d                   }t          j        d|f          }||         }|d         |d                  |d         |d                  k    }|dxx         t          j        |d         ||z  |          z  cc<   |dxx         t          j        |d         ||z  |          z  cc<   |d	xx         t          j        |d         ||          z  cc<   |d	xx         t          j        |d         ||          z  cc<   |S )
a0  
    Compute cis and total coverages of a cooler chunk.
    Every interaction is contributing to the "coverage" twice:
    at its row coordinate bin1_id, and at its column coordinate bin2_id

    Parameters
    ----------
    chunk : dict of dict/pd.DataFrame
        A cooler chunk produced by the cooler split-apply-combine pipeline.
    pixel_weight_key: str
        The key of a pixel chunk to retrieve pixel weights.

    Returns
    -------
    covs : np.array 2 x n_bins
        A numpy array with cis (the first row) and total (the 4nd) coverages.
    binsr   chromr   r	   r
   r   )weights	minlength   )lenr   zerosbincount)r   pixel_weight_keyr   r   n_binscovspixel_weightscis_masks           r   _get_chunk_coverager'      s@   & =D8_FgF8QK  D+,MG}VI./4=	AR3SSHGGGr{y=8#;v   GGG 	GGGr{y=8#;v   GGG 	GGGr{6),mvVVVVGGGGGGr{6),mvVVVVGGGKr   g    cAFcovr   c	                 6   	 ||n5|                      | j                            d          dz             d         }n#  t          d          xY w|d}	npt	          | |          rM|                                 |         dd         }
t          j        |
          }t          j        |
          }
d}	nt          d| d	          t          j
                            | |||
          }|r|                    t          |          }||                    t          |
|	          }| j        d         }|                    t           |	                              t          j        t          j        |                    }|%|                    t*                    }| d| dg}n!t          j        |dd|f<   | d| | d| g}|r|                     d          5 }|t*          nt0          }t3          ||          D ]C\  }}||d         v r	|d         |= t5          dd          } |d         j        |fd|i|d|i D|=|j                            dt          j        |d                   dz  t*                     ddd           n# 1 swxY w Y   |S )a  
    Calculate the sums of cis and genome-wide contacts (aka coverage aka marginals) for
    a sparse Hi-C contact map in Cooler HDF5 format.
    Note that for raw coverage (i.e. clr_weight_name=None) the sum(tot_cov) from this 
    function is two times the number of reads contributing to the cooler, 
    as each side contributes to the coverage.

    Parameters
    ----------
    clr : cooler.Cooler
        Cooler object
    ignore_diags : int, optional
        Drop elements occurring on the first ``ignore_diags`` diagonals of the
        matrix (including the main diagonal).
        If None, equals the number of diagonals ignored during IC balancing.
    chunksize : int, optional
        Split the contact matrix pixel records into equally sized chunks to
        save memory and/or parallelize. Default is 10^7
    clr_weight_name : str
        Name of the weight column. Specify to calculate coverage of balanced cooler.
    store : bool, optional
        If True, store the results in the input cooler file when finished. If clr_weight_name=None, 
        also stores total cis counts in the cooler info. Default is False.
    store_prefix : str, optional
        Name prefix of the columns of the bin table to save cis and total coverages. 
        Will add suffixes _cis and _tot, as well as _raw in the default case or _clr_weight_name if specified.
    nproc : int, optional
        How many processes to use for calculation. Ignored if map_functor is passed.
    map_functor : callable, optional
        Map function to dispatch the matrix chunks to workers.
        If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
        If nproc=1 this defaults the builtin map. 

    Returns
    -------
    cis_cov : 1D array, whose shape is the number of bins in ``h5``. Vector of bin sums in cis.
    tot_cov : 1D array, whose shape is the number of bins in ``h5``. Vector of bin sums.

    N/z/bins/weightignore_diagszjPlease, specify ignore_diags and/or IC balance this cooler! Cannot access the value used in IC balancing. r   r   z+cooler is not balanced, orbalancing weight z  is not available in the cooler.)	chunksizemapuse_lock)r   )r   r   nbins)r"   _cis_raw_tot_raw_cis__tot_zr+r   gzip   )compressioncompression_optsdatadtypecisr   r   )r9   )_load_attrsrootrstrip
ValueErrorr   r   r   isnan
nan_to_numcoolerparallelsplitpiper   r   infor'   reduceaddr    astypeintnanopenfloatzipdictcreate_datasetattrscreatesum)clrr+   r,   r.   clr_weight_namestorestore_prefixnprocmap_functorr"   r   bias_na_maskchunksr#   r$   store_namesgrpr9   
store_namecov_arrh5optss                        r   coverager`   A   s`   j	
 ' L!5!5!FGGW 	

x
 
 	
 "	C	1	1 
xxzz/*111-x~~}T""%R /R R R
 
 	

 _""3)W_"``F @[,??"-DO_``XgF;;*=M;NNUUVXV\^`^fgm^n^nooD{{3&000\2K2K2KM "QQQ_&>>_>><@g@gVe@g@gi IXXd^^ 
	Is*2CCE'*;'='=  #
GV,,FJ/&1EEE*F* %,06 >C     &	  tAw1(<C HHH
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I 
	I Ks   := AB$JJJ)r   )r   )numpyr   cooler.parallelrA   
lib.checksr   
lib.commonr   r   r   r'   rI   r-   r`   r   r   r   <module>re      s            + + + + + + ' ' ' ' ' '     $ $ $ $L  c#hh

k k k k k kr   