
    tf                        d dl Z d dlZd dlZd dlZd dlZd dlmZ ddl	m
Z
 ddlmZ ddlmZmZ ddl	mZmZ  ej$                          ej&                  d	ed
       ej*                  dddd       ej*                  dddd       ej*                  ddedd       ej*                  dde       ej*                  ddedd       ej*                  ddedd       ej*                  dd ed d       ej*                  d!d" ej.                  d#             ej*                  d$d%d&ed'd       ej*                  d(d)d*e ed+      d       ej*                  d,d-ed.d       ej*                  d/d0ed1d       ej*                  d2d3ed4d       ej*                  d5d6d7dd       ej*                  d8d9dd       ej*                  d:d;dd       ej*                  d<d= ej2                  g d>      d?d      d@                                                                                                                                      Zy)A    N)Pool   )ice)Cooler)bedsliceparse_cooler_uri   )cli
get_loggercool_uri	COOL_PATH)typemetavarz
--cis-onlyzMCalculate weights against intra-chromosomal data only instead of genome-wide.TF)helpis_flagdefaultz--trans-onlyzMCalculate weights against inter-chromosomal data only instead of genome-wide.z--ignore-diagszNumber of diagonals of the contact matrix to ignore, including the main diagonal. Examples: 0 ignores nothing, 1 ignores the main diagonal, 2 ignores diagonals (-1, 0, 1), etc.)r   r   r   show_defaultz--ignore-distzDistance from the diagonal in bp to ignore. The maximum of the corresponding number of diagonals and `--ignore-diags` will be used.)r   r   z	--mad-maxzIgnore bins from the contact matrix using the 'MAD-max' filter: bins whose log marginal sum is less than ``mad-max`` median absolute deviations below the median log marginal sum of all the bins in the same chromosome.   z	--min-nnzz_Ignore bins from the contact matrix whose marginal number of nonzeros is less than this number.
   z--min-countzRIgnore bins from the contact matrix whose marginal count is less than this number.z--blacklistzPath to a 3-column BED file containing genomic regions to mask out during the balancing procedure, e.g. sequence gaps or regions of poor mappability.)existsz--nprocz-pz.Number of processes to split the work between.   z--chunksizez-czFControl the number of pixels handled by each worker process at a time.g    cAz--tolzKThreshold value of variance of the marginals for the algorithm to converge.gh㈵>z--max-iterszGMaximum number of iterations to perform if convergence is not achieved.   z--namezName of column to write to.weightz--forcez-fz=Overwrite the target dataset, 'weight', if it already exists.z--checkz4Check whether a data column 'weight' already exists.z--stdoutz8Print weight column to stdout instead of saving to file.z--convergence-policyah  What to do with weights when balancing doesn't converge in max_iters. 'store_final': Store the final result, regardless of whether the iterations converge to the specified tolerance; 'store_nan': Store a vector of NaN values to indicate that the matrix failed to converge; 'discard': Store nothing and exit gracefully; 'error': Abort with non-zero exit status.)store_final	store_nandiscarderrorr   c                 t	   t        t              }t        |       \  }}|rt        j                  |d      5 }||   }||d   vr1t        j                  | d| d       t        j                  d       n0t        j                  | d| d       t        j                  d       d	d	d	       |	r|
rt        j                  d
      t        j                  |d      5 }||   }||d   v rB|s@|s8t        d| ddz   t        j                         t        j                  d       n|d   |= d	d	d	       |j                  d|  d       t        |       }| dd	l}t        |      5 }t!        j"                  |d|j%                         j'                  |j)                  d            rdnd	g dg ddt*        i      }d	d	d	       |j-                         d	d	 j/                  dd      }|j0                  }g }j3                         D ]X  \  }}t5        |||j6                  |j8                  |j:                  f      } |j=                  | j>                  j@                         Z tC        jD                  |      }nd	}|5tG        |tI        tC        jJ                  ||jL                  z                    }	 |dkD  rtO        |      }!|!jP                  }"ntR        }"tU        jV                  |||	|
|||||||dd|"      \  }#}$|dkD  r!jY                          	 tC        jZ                  |$d         s|j]                  d       |dk(  r|j]                  d       n|d k(  r%|j]                  d!       tB        j^                  |#d	d	 nW|d"k(  r'|j]                  d#       t        j                  d       n+|d$k(  r&|j]                  d#       t        j                  d       |r8t!        j`                  |#      jc                  t        jd                  ddd%d&'       y	t        j                  |d      5 }||   }d(d)d*}% |d   jf                  |fd+|#i|% |d   |   jh                  jk                  |$       d	d	d	       y	# 1 sw Y   xY w# 1 sw Y   QxY w# 1 sw Y   xY w# |dkD  r!jY                          w w xY w# 1 sw Y   y	xY w),z
    Out-of-core matrix balancing.

    Matrix must be symmetric. See the help for various filtering options to
    mask out poorly mapped bins.

    COOL_PATH : Path to a COOL file.

    rbinsz: No 'z' column found.r	   z::z is balanced.r   Nz8Provide at most one of --cis-only and --trans-only flagszr+'z' column already exists. z Use --force option to overwrite.)filezBalancing ""	i   )r   r	   r   )chromstartendr%   )sepheaderusecolsnamesdtypeT)observedF)	chunksizecis_only
trans_onlytolmin_nnz	min_count	blacklistmad_max	max_itersignore_diagsrescale_marginalsuse_lockmap	convergedz+Iteration limit reached without convergencer   z6Storing final result. Check log to assess convergence.r   zSaving weights as NaN.r   zDiscarding result and aborting.r    z%g)r)   indexna_repfloat_formatgzip   )compressioncompression_optsdata)6r   __name__r   h5pyFileclickechosysexit
UsageErrorprintstderrinfor   csvopenpdread_csvSniffer
has_headerreadstrr    groupby
chromsizesiterrowsr   r%   r&   r'   appendr=   valuesnpconcatenatemaxintceilbinsizer   imap_unorderedr:   r   iterative_correctioncloseallr   nanSeries	to_stringstdoutcreate_datasetattrsupdate)&r   nprocr.   r5   r2   r3   r4   r7   r1   r/   r0   r6   nameforcecheckrj   convergence_policyignore_distlogger	cool_path
group_pathh5grpclrrP   fbad_regionsbins_groupedrY   bad_bins_regresultpoolmap_biasstatsh5optss&                                         \/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/cooler/cli/balance.pybalancer      sR   D !F,X6IzYYy#& 	"Z.C3v;&

i[tfODE

i[:,mDE	 JF
 	
 
9d	# &rn3v;vv6789
 K%& KK+hZq)*

C)_ 	++KKM44QVVD\Bq!/nK	 xxz!},,Wt,D^^
!**, 	1FAslJCIIsww8WXFOOFLL//0	1 >>(+<RWW[3;;5N-O)PQ19;D&&DD..!%"
e$ 19JJL66%$%BC.LLQR;.LL12ffDG9,LL:;HHQK7*LL:;HHQK
		$!!JJuE"4 	" 	
 YYy$' 	22Z.C%+CF&CK&&tA$A&AK##**51	2 	2O	 	& &&	 	` 19JJL ,	2 	2s@   A.Q,AQ9$AR/A	R AR.,Q69RRR+.R7)rJ   rH   rF   numpyr]   pandasrR   multiprocessr   r<   r   apir   utilr   r   r
   r   commandargumentrW   optionr`   PathfloatChoicer        r   <module>r      s   
        -  
k:
 
 
5 
 
K		 
 
	 
)	 
	 
 
4	  	9	 	Q	I 
	 	R	 	&	 	H 	?	 	C	 
 
F	GM2	 ; JM2r   