
    DUf                        d dl Z d dlZd dlZd dlZd dlZd dlmZ ddl	m
Z
 ddlmZ ddlmZmZ ddl	mZmZ  ej                     ej        d	ed
           ej        dddd           ej        dddd           ej        ddedd           ej        dde           ej        ddedd           ej        ddedd           ej        dd ed d           ej        d!d" ej        d#                     ej        d$d%d&ed'd           ej        d(d)d*e ed+          d           ej        d,d-ed.d           ej        d/d0ed1d           ej        d2d3ed4d           ej        d5d6d7dd           ej        d8d9dd           ej        d:d;dd           ej        d<d= ej        g d>          d?d          d@                                                                                                                                                                                                                                     ZdS )A    N)Pool   )ice)Cooler)bedsliceparse_cooler_uri   )cli
get_loggercool_uri	COOL_PATH)typemetavarz
--cis-onlyzMCalculate weights against intra-chromosomal data only instead of genome-wide.TF)helpis_flagdefaultz--trans-onlyzMCalculate weights against inter-chromosomal data only instead of genome-wide.z--ignore-diagszNumber of diagonals of the contact matrix to ignore, including the main diagonal. Examples: 0 ignores nothing, 1 ignores the main diagonal, 2 ignores diagonals (-1, 0, 1), etc.)r   r   r   show_defaultz--ignore-distzDistance from the diagonal in bp to ignore. The maximum of the corresponding number of diagonals and `--ignore-diags` will be used.)r   r   z	--mad-maxzIgnore bins from the contact matrix using the 'MAD-max' filter: bins whose log marginal sum is less than ``mad-max`` median absolute deviations below the median log marginal sum of all the bins in the same chromosome.   z	--min-nnzz_Ignore bins from the contact matrix whose marginal number of nonzeros is less than this number.
   z--min-countzRIgnore bins from the contact matrix whose marginal count is less than this number.z--blacklistzPath to a 3-column BED file containing genomic regions to mask out during the balancing procedure, e.g. sequence gaps or regions of poor mappability.)existsz--nprocz-pz.Number of processes to split the work between.   z--chunksizez-czFControl the number of pixels handled by each worker process at a time.g    cAz--tolzKThreshold value of variance of the marginals for the algorithm to converge.gh㈵>z--max-iterszGMaximum number of iterations to perform if convergence is not achieved.   z--namezName of column to write to.weightz--forcez-fz=Overwrite the target dataset, 'weight', if it already exists.z--checkz4Check whether a data column 'weight' already exists.z--stdoutz8Print weight column to stdout instead of saving to file.z--convergence-policyah  What to do with weights when balancing doesn't converge in max_iters. 'store_final': Store the final result, regardless of whether the iterations converge to the specified tolerance; 'store_nan': Store a vector of NaN values to indicate that the matrix failed to converge; 'discard': Store nothing and exit gracefully; 'error': Abort with non-zero exit status.)store_final	store_nandiscarderrorr   c                 
   t          t                    }t          |           \  }}|rt          j        |d          5 }||         }||d         vr/t          j        | d| d           t          j        d           n.t          j        | d| d           t          j        d           d	d	d	           n# 1 swxY w Y   |	r|
rt          j	        d
          t          j        |d          5 }||         }||d         v rD|sB|s7t          d| ddz   t          j                   t          j        d           n	|d         |= d	d	d	           n# 1 swxY w Y   |                    d|  d           t          |           }|7dd	l}t          |          5 }t!          j        |d|                                                    |                    d                    rdnd	g dg ddt*          i          }d	d	d	           n# 1 swxY w Y   |                                d	d	                             dd          }|j        }g }|                                D ]G\  }}t5          |||j        |j        |j        f          } |                    | j        j                    HtC          j"        |          }nd	}|7tG          |tI          tC          j%        ||j&        z                                }	 |dk    rtO          |          }!|!j(        }"ntR          }"tU          j+        |||	|
|||||||dd|"          \  }#}$|dk    r|!,                                 n # |dk    r|!,                                 w w xY wtC          j-        |$d                   s|.                    d           |dk    r|.                    d           n|d k    r'|.                    d!           tB          j/        |#d	d	<   n_|d"k    r*|.                    d#           t          j        d           n/|d$k    r)|.                    d#           t          j        d           |r8t!          j0        |#          1                    t          j2        ddd%d&'           d	S t          j        |d          5 }||         }d(d)d*}% |d         j3        |fd+|#i|% |d         |         j4        5                    |$           d	d	d	           d	S # 1 swxY w Y   d	S ),z
    Out-of-core matrix balancing.

    Matrix must be symmetric. See the help for various filtering options to
    mask out poorly mapped bins.

    COOL_PATH : Path to a COOL file.

    rbinsz: No 'z' column found.r	   z::z is balanced.r   Nz8Provide at most one of --cis-only and --trans-only flagszr+'z' column already exists. z Use --force option to overwrite.)filezBalancing ""	i   )r   r	   r   )chromstartendr%   )sepheaderusecolsnamesdtypeT)observedF)	chunksizecis_only
trans_onlytolmin_nnz	min_count	blacklistmad_max	max_itersignore_diagsrescale_marginalsuse_lockmap	convergedz+Iteration limit reached without convergencer   z6Storing final result. Check log to assess convergence.r   zSaving weights as NaN.r   zDiscarding result and aborting.r    z%g)r)   indexna_repfloat_formatgzip   )compressioncompression_optsdata)6r   __name__r   h5pyFileclickechosysexit
UsageErrorprintstderrinfor   csvopenpdread_csvSniffer
has_headerreadstrr    groupby
chromsizesiterrowsr   r%   r&   r'   appendr=   valuesnpconcatenatemaxintceilbinsizer   imap_unorderedr:   r   iterative_correctioncloseallr   nanSeries	to_stringstdoutcreate_datasetattrsupdate)&r   nprocr.   r5   r2   r3   r4   r7   r1   r/   r0   r6   nameforcecheckrj   convergence_policyignore_distlogger	cool_path
group_pathh5grpclrrP   fbad_regionsbins_groupedrY   bad_bins_regresultpoolmap_biasstatsh5optss&                                         O/var/www/html/software/conda/lib/python3.11/site-packages/cooler/cli/balance.pybalancer      sc   D !!F,X66Iz Yy#&& 	"Z.C3v;&&
iDDtDDDEEE
iDD:DDDEEE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  
J 
F
 
 	
 
9d	#	# &rn3v;v &777789   
 K%& & & & & & & & & & & & & & & KK)h)))***


C


)__ 	+KKMM44QVVD\\BBLqq!		///n  K	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 xxzz!!!},,Wt,DD^
!**,, 	1 	1FAslJCIsw8WXXFOOFL/0000>(++<RW[3;5N-O-O)P)PQQ199;;D&DDD.!%"
 
 
e$ 199JJLLL 199JJLLLL  6%$%% BCCC..LLQRRRR;..LL1222fDGG9,,LL:;;;HQKKKK7**LL:;;;HQKKK 
2
	$!!JuE"4 	" 	
 	
 	
 	
 	
 Yy$'' 	22Z.C%+CCF&CK&tAA$A&AAAK#**5111	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2sX   A0B::B>B>2AEEEA"HHH>A	M" "M?(A
S??TT)rJ   rH   rF   numpyr]   pandasrR   multiprocessr   r<   r   apir   utilr   r   r
   r   commandargumentrW   optionr`   PathfloatChoicer        r   <module>r      sc   



                             - - - - - - - -         
k:::
   
   
5 
   
K		   
 
	 	 	 
)	   
	   
 
4	 	 	    	9	   	Q	CII   
	   	R	   	&	   	H   	?	   	C	   
 
FFF	G	G  M2 M2            	 	     ;: JM2 M2 M2r   