
    tf2                    b   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	Z
ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ dgZ ee      Z G d de      Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$dddddd dddddd e%ddd!d"	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$d#Z&e&Z'y)%    )annotationsN)partial)add)Literal   )
get_logger)
MapFunctor)Cooler)	partitionsplit)madbalance_coolerc                      e Zd Zy)ConvergenceWarningN)__name__
__module____qualname__     X/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/cooler/balance.pyr   r      s    r   r   c                8    t        j                  | d   d         S )Npixelscount)npcopy)chunks    r   _initr      s    775?7+,,r   c                    d||dk7  <   |S )Nr   r   r   )r   datas     r   	_binarizer       s    DOKr   c                \    |d   }t        j                  |d   |d   z
        | k  }d||<   |S )Nr   bin1_idbin2_idr   )r   abs)n_diagsr   r   r   masks        r   _zero_diagsr'   "   s;    8_F66&#fY&7787BDDJKr   c                L    | d   d   }| d   }||d      ||d      k7  }d||<   |S Nbinschromr   r"   r#   r   r   r   r   	chrom_idsr   r&   s        r   _zero_transr.   )   E    fg&I8_FVI&'9VI5F+GGDDJKr   c                L    | d   d   }| d   }||d      ||d      k(  }d||<   |S r)   r   r,   s        r   	_zero_cisr1   1   r/   r   c                8    |d   }| |d      | |d      z  |z  }|S )Nr   r"   r#   r   )vecr   r   r   s       r   _timesouterproductr4   9   s3    8_Fvi !Cy(9$::TADKr   c                    t        | d   d         }| d   }t        j                  |d   ||      t        j                  |d   ||      z   }|S )Nr*   r+   r   r"   )weights	minlengthr#   )lenr   bincount)r   r   nr   margs        r   _marginalizer<   ?   sY    E&M'"#A8_F;;vi($!Dr{{y41H D Kr   c
                   d}
t        |       }t        |      D ]  }t        ||||	      j                  t              j                  |      j                  t        |       j                  t              j                  t        t        j                  |            }||dk7     }t        |      s't        j                  }
t        j                  | d d  d} nl||j                         z  }d||dk(  <   | |z  } |j                         }t        j!                  d|        ||k  s n t#        j$                  dt&               j                         }
t        j                  | | dk(  <   |r| t        j(                  |
      z  } | |
fS )N      ?spansmapuse_lockr           r   variance is ,Iteration limit reached without convergence.)r8   ranger   preparer   piper4   r<   reducer   r   zerosnanmeanvarloggerinfowarningswarnr   sqrt)biasclrr@   filters	chunksizerA   tol	max_itersrescale_marginalsrB   scalen_bins_r;   nzmargrM   s                   r   _balance_genomewider^   H   sM    EYF9 
#Uh?WU^T']T$d+T,VC&)* 	 dai6{FFEffDGCfkkm#TQYjjll3%()91
4 	:<N	
 KKMEffDOr   c
                   |j                         d   d d  }
t        j                  t        |j                                     }|j	                  d      }|j	                  d      }t        j
                  t        |            }t        j                  |t        j                        }t        |       }t        ||d d |dd        D ]  \  }}}t        j                  |
|          ||   ||   }}t        t        |||            }d}t        j                  }t        |      D ]  }t        ||||	      j                  t               j#                  |      j#                  t$        |       j#                  t&              j)                  t*        t        j,                  |            }||| }||dk7     }t        |      s't        j                  }t        j                  | || d	} n|||j/                         z  }d||dk(  <   | ||xxx |z  ccc |j1                         }t        j                  d
|        ||k  s n" t3        j4                  d|
|    dt6               j/                         }| || }t        j                  ||dk(  <   |||<   |||<   |s| ||xxx t        j8                  |      z  ccc  | ||fS )Nnameindexes/chrom_offsetzindexes/bin1_offsetr   r>   r?   r   rC   rD   z/Iteration limit reached without convergence on .)chromsr   aranger8   
_load_dsetones	full_likerK   ziprN   rO   listr   rF   r   rG   r   rH   r4   r<   rI   r   rJ   rL   rM   rP   rQ   r   rR   )rS   rT   r@   rU   rV   rA   rW   rX   rY   rB   rd   r-   chrom_offsetsbin1_offsetsscales	variancesr[   cidlohiplophirZ   rM   r\   r;   r]   bs                               r   _balance_cisonlyru   }   sr    ZZ\&!!$F		#cjjl+,INN#9:M>>"78LWWS^$FVRVV,IYF9mCR&8-:KL .*RF3K #\"%5SYsC34ffy! 	AcC(Cg($/l#RXXf-.  2;D$!)_Fv; ffR&++-'DDOBK4K**,CKK,se,-Sy3	8 MMA&+aP"
 BKFF!q&	s	#BK2775>)K].*` ""r   c
           
        d}
t        |       }|j                  d      }dt        j                  t	        |d d |dd        D cg c]  \  }}d||z
  |z  z
  g||z
  z   c}}      z  }t        |      D ]!  }t        ||||	      j                  t              j                  |      j                  t              j                  t        | |z        j                  t              j                  t        t        j                  |            }||dk7     }t        |      s't        j                   }
t        j                   | d d  d} nl||j#                         z  }d||dk(  <   | |z  } |j%                         }t&        j)                  d|        ||k  s" n t+        j,                  d	t.               j#                         }
t        j                   | | dk(  <   |r| t        j0                  |
      z  } | |
fS c c}}w )
Nr>   ra   rb   r   r?   r   rC   rD   rE   )r8   rf   r   concatenateri   rF   r   rG   r   rH   r1   r4   r<   rI   r   rJ   rK   rL   rM   rN   rO   rP   rQ   r   rR   )rS   rT   r@   rU   rV   rA   rW   rX   rY   rB   rZ   r[   rk   rp   rq   cweightsr\   r;   r]   rM   s                       r   _balance_transonlyry      s    EYFNN#9:MR^^ mCR0-2CD	
B 27f$$&"r'2	
 H 9 
#Uh?WU^T']T)_T$dXo6T,VC&)* 	 dai6{FFEffDGCfkkm#TQYjjll3%()93
6 	:<N	
 KKMEffDOU	
s   G9F      
   Tgh㈵>   i weight)cis_only
trans_onlyignore_diagsmad_maxmin_nnz	min_count	blacklistrY   x0rW   rX   rV   rA   rB   store
store_namec                  t        | j                  d         }||}d|fg}n5t        j                  d||z   |      }t	        t        |dd |dd             }g }|r|j                  t               |r|j                  t        t        |             t        | j                  d         }|	|	}d|t        j                  |      <   nt        j                  |t              }|dkD  r|t        g|}t        | |||      j                  t               j#                  |      j#                  t$              j'                  t(        t        j*                  |            }d|||k  <   |}t        | |||      j                  t               j#                  |      j#                  t$              j'                  t(        t        j*                  |            }|rd|||k  <   |dkD  r| j-                  d	      }t        |dd |dd       D ]1  \  }}||| }|||xxx t        j.                  ||dkD           z  ccc 3 t        j0                  ||dkD           }t        j.                  |      }t3        |      } t        j4                  ||| z  z
        }!d|||!k  <   |d||<   |rt7        || |||||
|||
      \  }}"}#n3|rt9        || |||||
|||
      \  }}"}#nt;        || |||||
|||
      \  }}"}#|
||||||"|#|
k  |#d
d
}$|ri| j=                  d      5 }%||%d   v r|%d   |= ddd}& |%d   j>                  |fd|i|& |%d   |   j@                  jC                  |$       ddd       ||$fS ||$fS # 1 sw Y   ||$fS xY w)a  
    Iterative correction or matrix balancing of a sparse Hi-C contact map in
    Cooler HDF5 format.

    Parameters
    ----------
    clr : cooler.Cooler
        Cooler object
    cis_only : bool, optional
        Do iterative correction on intra-chromosomal data only.
        Inter-chromosomal data is ignored.
    trans_only : bool, optional
        Do iterative correction on inter-chromosomal data only.
        Intra-chromosomal data is ignored.
    ignore_diags : int or False, optional
        Drop elements occurring on the first ``ignore_diags`` diagonals of the
        matrix (including the main diagonal).
    chunksize : int or None, optional
        Split the contact matrix pixel records into equally sized chunks to
        save memory and/or parallelize. Set to ``None`` to use all the pixels
        at once.
    mad_max : int, optional
        Pre-processing bin-level filter. Drop bins whose log marginal sum is
        less than ``mad_max`` median absolute deviations below the median log
        marginal sum.
    min_nnz : int, optional
        Pre-processing bin-level filter. Drop bins with fewer nonzero elements
        than this value.
    min_count : int, optional
        Pre-processing bin-level filter. Drop bins with lower marginal sum than
        this value.
    blacklist : list or 1D array, optional
        An explicit list of IDs of bad bins to filter out when performing
        balancing.
    rescale_marginals : bool, optional
        Normalize the balancing weights such that the balanced matrix has rows
        / columns that sum to 1.0. The scale factor is stored in the ``stats``
        output dictionary.
    map : callable, optional
        Map function to dispatch the matrix chunks to workers.
        Default is the builtin ``map``, but alternatives include parallel map
        implementations from a multiprocessing pool.
    x0 : 1D array, optional
        Initial weight vector to use. Default is to start with ones(n_bins).
    tol : float, optional
        Convergence criterion is the variance of the marginal (row/col) sum
        vector.
    max_iters : int, optional
        Iteration limit.
    store : bool, optional
        Whether to store the results in the file when finished. Default is
        False.
    store_name : str, optional
        Name of the column of the bin table to save to. Default name is
        'weight'.

    Returns
    -------
    bias : 1D array, whose shape is the number of bins in ``h5``.
        Vector of bin bias weights to normalize the observed contact map.
        Dropped bins will be assigned the value NaN.
        N[i, j] = O[i, j] * bias[i] * bias[j]
    stats : dict
        Summary of parameters used to perform balancing and the average
        magnitude of the corrected matrix's marginal sum at convergence.

    nnzNr   rb   r   nbins)dtyper?   ra   F)
rW   r   r   r   r   r   rZ   	convergedrM   divisive_weightszr+r*   gzip   )compressioncompression_optsr   )"intrO   r   re   rj   ri   appendr.   r   r'   isnanrg   floatr    r   rG   r   rH   r<   rI   r   rJ   rf   medianlogr   expru   ry   r^   opencreate_datasetattrsupdate)'rT   r   r   r   r   r   r   r   rY   r   rW   rX   rV   rA   rB   r   r   r   r@   edgesbase_filtersr[   rS   rU   marg_nnzr;   offsetsrp   rq   c_marg	logNzMargmed_logNzMargdev_logNzMargcutoffrZ   rM   statsgrph5optss'                                          r   r   r     s   p chhuo
C	S
		!S9_i8SsU12Y/0 LK(GK>? '"#F	~ RXXd^wwvU+ {,|,#Uh?WU^T']T,VC&)* 	 $%X GcC(;		g	l		RXXf%	& 	 !"TI {..!78'#2,4 	9FB"R[FBK299VFQJ%788K	9 FF4q>*			),I-(??@TF] Y +
eS 
-
eS /
eS $3Y!E XXd^ 	8sS[(K
+%+CF&CK&&zGGGK
#))007	8 ;4;	8 ;s   AM((M4)$rT   r
   r   boolr   r   r   zint | Literal[False]r   r   r   r   r   r   r   z
str | NonerY   r   r   znp.ndarray | NonerW   r   rX   r   rV   r   rA   r	   rB   r   r   r   r   strreturnztuple[np.ndarray, dict])(
__future__r   rP   	functoolsr   operatorr   typingr   numpyr   _loggingr   _typingr	   apir
   parallelr   r   utilr   __all__r   rN   UserWarningr   r   r    r'   r.   r1   r4   r<   r^   ru   ry   rA   r   iterative_correctionr   r   r   <module>r      s   "          & 
	H		 	-
2jD#N;B )* " %V	V V 	V
 'V V V V V V 	V 
V V V 
V  !V" #V$ %V& 'Vr & r   