o
    Nrf2                     @  s$  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	Z
ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ dgZeeZG dd deZdd Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd  Z#d!d" Z$d#d#d$d%d&d dd'dd(d)d*e%d#d#d+d,dJdIdZ&e&Z'dS )K    )annotationsN)partial)add)Literal   )
get_logger)
MapFunctor)Cooler)	partitionsplit)madbalance_coolerc                   @  s   e Zd ZdS )ConvergenceWarningN)__name__
__module____qualname__ r   r   W/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cooler/balance.pyr      s    r   c                 C  s   t | d d S )Npixelscount)npcopy)chunkr   r   r   _init   s   r   c                 C  s   d||dk< |S )Nr   r   r   )r   datar   r   r   	_binarize   s   r   c                 C  s.   |d }t |d |d  | k }d||< |S )Nr   bin1_idbin2_idr   )r   abs)Zn_diagsr   r   r   maskr   r   r   _zero_diags"   s   r    c                 C  s8   | d d }| d }||d  ||d  k}d||< |S Nbinschromr   r   r   r   r   r   r   	chrom_idsr   r   r   r   r   _zero_trans)   
   r&   c                 C  s8   | d d }| d }||d  ||d  k}d||< |S r!   r   r$   r   r   r   	_zero_cis1   r'   r(   c                 C  s(   |d }| |d  | |d   | }|S )Nr   r   r   r   )Zvecr   r   r   r   r   r   _timesouterproduct9   s   r)   c                 C  sD   t | d d }| d }tj|d ||dtj|d ||d }|S )Nr"   r#   r   r   )weightsZ	minlengthr   )lenr   Zbincount)r   r   nr   margr   r   r   _marginalize?   s   
r.   c
                 C  s  d}
t | }t|D ]X}t||||	dt|t| tt	t
|}||dk }t |s@t
j}
t
j| d d < d} n)||  }d||dk< | | } | }td|  ||k rb nq
tdt | }
t
j| | dk< |r}| t
|
 } | |
|fS )N      ?spansmapuse_lockr           r   variance is ,Iteration limit reached without convergence.)r+   ranger   preparer   piper)   r.   reducer   r   zerosnanmeanvarloggerinfowarningswarnr   sqrt)biasclrr1   filters	chunksizer2   tol	max_itersrescale_marginalsr3   scalen_bins_r-   nzmargr>   r   r   r   _balance_genomewideH   s@   	
rO   c
                 C  s  |  d d d  }
tt|  }|d}|d}tt|}t|tj}t| }t||d d |dd  D ]\}}}t	
|
|  || || }}tt|||}d}tj}t|D ]d}t||||	dt|t| ttt|}||| }||dk }t|stj}tj| ||< d	} n5||  }d||dk< | ||  |  < | }t	
d
|  ||k r nqctd|
|  dt | }| || }tj||dk< |||< |||< |r| ||  t|  < q=| ||fS )Nnameindexes/chrom_offsetzindexes/bin1_offsetr   r/   r0   r   r4   r5   z/Iteration limit reached without convergence on .)chromsr   aranger+   
_load_dsetonesZ	full_liker<   zipr?   r@   listr
   r7   r   r8   r   r9   r)   r.   r:   r   r;   r=   r>   rA   rB   r   rC   )rD   rE   r1   rF   rG   r2   rH   rI   rJ   r3   rT   r%   chrom_offsetsZbin1_offsetsscalesZ	variancesrL   ZcidlohiZplophirK   r>   rM   r-   rN   br   r   r   _balance_cisonly}   sb   

&	
r`   c
              	     sJ  d}
t |  |d}dt fddt|d d |dd  D  }t|D ]]}t||||	dt	|	t
	t| | 	ttt }||dk }t |sctj}
tj| d d < d	} n)||  }d||dk< | | } | }td
|  ||k r nq(tdt | }
tj| | dk< |r| t|
 } | |
|fS )Nr/   rQ   c                   s*   g | ]\}}d ||    g||  qS )r   r   ).0r\   r]   rL   r   r   
<listcomp>   s    z&_balance_transonly.<locals>.<listcomp>rR   r   r0   r   r4   r5   r6   )r+   rV   r   ZconcatenaterX   r7   r   r8   r   r9   r(   r)   r.   r:   r   r;   r<   r=   r>   r?   r@   rA   rB   r   rC   )rD   rE   r1   rF   rG   r2   rH   rI   rJ   r3   rK   rZ   ZcweightsrM   r-   rN   r>   r   rb   r   _balance_transonly   sN   



rd   F      
   Tgh㈵>   i weight)cis_only
trans_onlyignore_diagsmad_maxmin_nnz	min_count	blacklistrJ   x0rH   rI   rG   r2   r3   store
store_namerE   r	   rj   boolrk   rl   int | Literal[False]rm   intrn   ro   rp   
str | NonerJ   rq   np.ndarray | NonerH   floatrI   rG   r2   r   r3   rr   rs   strreturntuple[np.ndarray, dict]c          '      C  s  t | jd }|du r|}d|fg}ntd|| |}tt|dd |dd }g }|r4|t |r>|tt	| t | jd }|	durS|	}d|t
|< ntj|td}|dkrtg|}t| |||dt|ttt|}d|||k < |}t| |||dt|ttt|}|rd|||k < |dkr| d	}t|dd |dd D ]\}}||| }|||  t||dk   < qt||dk }t|}t|} t|||   }!d|||!k < |durd||< |rt|| |||||
|||
\}}"}#n$|r&t|| |||||
|||
\}}"}#nt|| |||||
|||
\}}"}#|
||||||"|#|
k |#d
d
}$|r| d3}%||%d v rZ|%d |= ddd}&|%d j|fd|i|& |%d | j !|$ W d   ||$fS 1 sw   Y  ||$fS )a  
    Iterative correction or matrix balancing of a sparse Hi-C contact map in
    Cooler HDF5 format.

    Parameters
    ----------
    clr : cooler.Cooler
        Cooler object
    cis_only : bool, optional
        Do iterative correction on intra-chromosomal data only.
        Inter-chromosomal data is ignored.
    trans_only : bool, optional
        Do iterative correction on inter-chromosomal data only.
        Intra-chromosomal data is ignored.
    ignore_diags : int or False, optional
        Drop elements occurring on the first ``ignore_diags`` diagonals of the
        matrix (including the main diagonal).
    chunksize : int or None, optional
        Split the contact matrix pixel records into equally sized chunks to
        save memory and/or parallelize. Set to ``None`` to use all the pixels
        at once.
    mad_max : int, optional
        Pre-processing bin-level filter. Drop bins whose log marginal sum is
        less than ``mad_max`` median absolute deviations below the median log
        marginal sum.
    min_nnz : int, optional
        Pre-processing bin-level filter. Drop bins with fewer nonzero elements
        than this value.
    min_count : int, optional
        Pre-processing bin-level filter. Drop bins with lower marginal sum than
        this value.
    blacklist : list or 1D array, optional
        An explicit list of IDs of bad bins to filter out when performing
        balancing.
    rescale_marginals : bool, optional
        Normalize the balancing weights such that the balanced matrix has rows
        / columns that sum to 1.0. The scale factor is stored in the ``stats``
        output dictionary.
    map : callable, optional
        Map function to dispatch the matrix chunks to workers.
        Default is the builtin ``map``, but alternatives include parallel map
        implementations from a multiprocessing pool.
    x0 : 1D array, optional
        Initial weight vector to use. Default is to start with ones(n_bins).
    tol : float, optional
        Convergence criterion is the variance of the marginal (row/col) sum
        vector.
    max_iters : int, optional
        Iteration limit.
    store : bool, optional
        Whether to store the results in the file when finished. Default is
        False.
    store_name : str, optional
        Name of the column of the bin table to save to. Default name is
        'weight'.

    Returns
    -------
    bias : 1D array, whose shape is the number of bins in ``h5``.
        Vector of bin bias weights to normalize the observed contact map.
        Dropped bins will be assigned the value NaN.
        N[i, j] = O[i, j] * bias[i] * bias[j]
    stats : dict
        Summary of parameters used to perform balancing and the average
        magnitude of the corrected matrix's marginal sum at convergence.

    nnzNr   rR   r   Znbins)Zdtyper0   rQ   F)
rH   rn   ro   rm   rj   rl   rK   Z	convergedr>   Zdivisive_weightszr+r"   gzip   )compressionZcompression_optsr   )"rv   r@   r   rU   rY   rX   appendr&   r   r    isnanrW   ry   r   r   r8   r   r9   r.   r:   r   r;   rV   Zmedianlogr   expr`   rd   rO   openZcreate_datasetattrsupdate)'rE   rj   rk   rl   rm   rn   ro   rp   rJ   rq   rH   rI   rG   r2   r3   rr   rs   r}   r1   edgesZbase_filtersrL   rD   rF   Zmarg_nnzr-   offsetsr\   r]   Zc_margZ	logNzMargZmed_logNzMargZdev_logNzMargcutoffrK   r>   statsgrpZh5optsr   r   r   r     s   X

	
"$




)$rE   r	   rj   rt   rk   rt   rl   ru   rm   rv   rn   rv   ro   rv   rp   rw   rJ   rt   rq   rx   rH   ry   rI   rv   rG   rv   r2   r   r3   rt   rr   rt   rs   rz   r{   r|   )(
__future__r   rA   	functoolsr   operatorr   typingr   numpyr   Z_loggingr   Z_typingr   apir	   Zparallelr
   r   utilr   __all__r   r?   UserWarningr   r   r   r    r&   r(   r)   r.   rO   r`   rd   r2   r   Ziterative_correctionr   r   r   r   <module>   sV    	5GA Z