
    DUf                        d dl mZ d dlZd dlZd dlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZ dd	lmZ  ej        ej        
            ej                     ej        dded           ej        dde eed                     ej        ddd ej        dd          dd           ej        ddedd           ej        dddde            ej        d!d"ed#d           ej        d$d%edd           ej        d&d'ed(d           ej        d)d*ed+d           ej        d,d-ed.d           ej        d/d0ed1d           ej        d2d3d4dd5           ej        d6d7d8ed9          d:                                                                                                                                                                         ZdS );    )partialN   )cli   )api)make_cooler_view)read_viewframe_from_fileread_expected_from_file)validate_csv)level	cool_path	COOL_PATH)metavartypenargsexpected_pathEXPECTED_PATHzbalanced.avg)default_column)r   r   callbackz--viewz	--regionsa  Path to a BED file with the definition of viewframe (regions) used in the calculation of EXPECTED_PATH. Dot-calling will be performed for these regions independently e.g. chromosome arms. Note that '--regions' is the deprecated name of the option. Use '--view' instead. F)existsdir_okayT)helpr   defaultshow_defaultz--clr-weight-namez+Use cooler balancing weight with this name.weightz-pz--nproczQNumber of processes to split the work between. [default: 1, i.e. no process pool])r   r   r   z--max-loci-separationzLimit loci separation for dot-calling, i.e., do not call dots for loci that are further than max_loci_separation basepair apart. 2-20MB is reasonable and would capture most of CTCF-dots.i z--max-nans-tolerateda&  Maximum number of NaNs tolerated in a footprint of every used filter. Must be controlled with caution, as large max-nans-tolerated, might lead to pixels scored in the padding area of the tiles to "penetrate" to the list of scored pixels for the statistical testing. [max-nans-tolerated <= 2*w ]z--tile-sizezlTile size for the Hi-C heatmap tiling. Typically on order of several mega-bases, and <= max_loci_separation.i[ z--num-lambda-binsz}Number of log-spaced bins to divide your adjusted expected between. Same as HiCCUPS_W1_MAX_INDX (40) in the original HiCCUPS.-   z--fdrzZFalse discovery rate (FDR) to control in the multiple hypothesis testing BH-FDR procedure.g{Gz?z--clustering-radiusz}Radius for clustering dots that have been called too close to each other.Typically on order of 40 kilo-bases, and >= binsize.iX  z-vz	--verbosezEnable verbose output)r   is_flagr   z-oz--outputzDSpecify output file name to store called dots in a BEDPE-like format)r   r   requiredc                    t          j        |           }|\  }}|t          |          }nt          ||d          }t	          |d|g||          }t
          j                            |||||d||||	|
d||          }|r|                    |dddd	
           dS t          |                    |dddd	
                     dS )a  
    Call dots on a Hi-C heatmap that are not larger than max_loci_separation.

    COOL_PATH : The paths to a .cool file with a balanced Hi-C map.

    EXPECTED_PATH : The paths to a tsv-like file with expected signal,
    including a header. Use the '::' syntax to specify a column name.

    Analysis will be performed for chromosomes referred to in EXPECTED_PATH, and
    therefore these chromosomes must be a subset of chromosomes referred to in
    COOL_PATH. Also chromosomes refered to in EXPECTED_PATH must be non-trivial,
    i.e., contain not-NaN signal. Thus, make sure to prune your EXPECTED_PATH
    before applying this script.

    COOL_PATH and EXPECTED_PATH must be binned at the same resolution.

    EXPECTED_PATH must contain at least the following columns for cis contacts:
    'region1/2', 'dist', 'n_valid', value_name. value_name is controlled using
    options. Header must be present in a file.

    NT)check_sortingcis)contact_typeexpected_value_colsverify_viewverify_cooler)expected_value_colclr_weight_nameview_dfkernelsmax_loci_separationmax_nans_toleratedn_lambda_binslambda_bin_fdrclustering_radiuscluster_filtering	tile_sizenproc	Fnan)sepheaderindexna_rep)
coolerCoolerr   r	   r
   r   	dotfinderdotsto_csvprint)r   r   viewr'   r1   r*   r+   r0   num_lambda_binsfdrr.   verboseoutputclrr&   r(   expecteddot_calls_dfs                     O/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/cli/dots.pyr;   r;      s'   P -	
"
"C(5%M% |"3''*4DIII&/0  H =%%-'/-%+ &  L$  
FTuUUUUU 	DU5    	
 	
 	
 	
 	
    )	functoolsr   r8   loggingclick r   r   
lib.commonr   lib.ior	   r
   utilr   basicConfigINFOcommandargumentstroptionPathintfloatr;    rG   rF   <module>rY      s                        ) ) ) ) ) ) F F F F F F F F        ', ' ' ' ' 	
	   	W\.AAA	   
Z 
55	1	1	1
 
 
 	6	   
*	   
A 
   
R 
	 	 	 
M	   
J	   
,	   
;	   +3T5   	O	  O
 O
      	 	   
 
   HO
 O
 O
rG   