
    DUf+                        d dl mZ d dlmZ d dlZd dlZd dlZ	d dl
Z
d dlZddlmZ d dlZddlmZ ddlmZmZmZ ddlmZmZ dd	lmZ  ej                     ej        d
de           ej        dde eed                     ej        dde eed                     ej        ddd ej        ddg          dd           ej        ddedd           ej        dd edd           ej        d!d"d#ed$d           ej        d%d&d'eefd(d)           ej        d*d+eefd(d           ej        d,d-ed.d           ej        d/d0dd12           ej        d3d4d5 ej         d6          d17           ej        d8d9d:d;           ej        d< ej        g d=          dd>?           ej        d@dA ej        dBdCg          dCd           ej        dDdEdFdG           ej        dHdIedJK           ej        dLdMedK           ej        dNdOP           ej        dQdRdSdd12          dT                                                                                                                                                                                                                                                             Z!dS )U    )partialN   )api   )validate_csv)make_cooler_viewmask_cooler_bad_binsalign_track_with_cooler)read_viewframe_from_fileread_expected_from_file)cli	cool_path	COOL_PATH)metavartype
track_path
TRACK_PATHE1)default_column)r   r   callbackexpected_pathEXPECTED_PATHzbalanced.avgz-tz--contact-typez!Type of the contacts to aggregatecistransT)helpr   defaultshow_defaultz
--min-distzMinimal distance between bins to consider, bp. If negative, removesthe first two diagonals of the data. Ignored with --contact-type trans.z
--max-distzgMaximal distance between bins to consider, bp. Ignored, if negative. Ignored with --contact-type trans.z-nz--n-binsz+Number of bins for digitizing track values.2   z--vrangevrangea7  Low and high values used for binning genome-wide track values, e.g. if `range`=(-0.05, 0.05), `n-bins` equidistant bins would be generated. Use to prevent extreme track values from exploding the bin range and to ensure consistent bins across several runs of `compute_saddle` command using different track files.)NN)r   r   r   nargsz--qrangea(  Low and high values used for quantile bins of genome-wide track values,e.g. if `qrange`=(0.02, 0.98) the lower bin would start at the 2nd percentile and the upper bin would end at the 98th percentile of the genome-wide signal. Use to prevent the extreme track values from exploding the bin range.z--clr-weight-namez$Use balancing weight with this name.weightz--strength/--no-strengthz6Compute and save compartment 'saddle strength' profileF)r   is_flagr   z--viewz	--regionsa  Path to a BED file containing genomic regions for which saddleplot will be calculated. Region names can be provided in a 4th column and should match regions and their names in expected. Note that '--regions' is the deprecated name of the option. Use '--view' instead. )exists)r   r   requiredz-oz--out-prefixzDump 'saddledata', 'binedges' and 'hist' arrays in a numpy-specific .npz container. Use numpy.load to load these arrays into a dict-like object. The digitized signal values are saved to a bedGraph-style TSV.)r   r%   z--fig)pngjpgsvgpdfpsepszGenerate a figure and save to a file of the specified format. If not specified - no image is generated. Repeat for multiple output formats.)r   multipler   z--scalezValue scale for the heatmaplinearlogz--cmapzName of matplotlib colormapcoolwarm)r   r   r   z--vminzLow value of the saddleplot colorbar. Note: value in original units irrespective of used scale, and therefore should be positive for both vmin and vmax.g      ?)r   r   r   z--vmaxz%High value of the saddleplot colorbarz--hist-colorz!Face color of histogram bar chart)r   z-vz	--verbosezEnable verbose outputc                 |   t          j        |           }|\  }}|\  }}ddd|g}dt          j        dt          j        dt          j        |t          j        i}t          j        |||d|          }t          |          }||}nt          ||d          }t          j        d |                                                    d          D                       }|g}t          |||||	          }|d
k     rd}n)t          t          j        ||j        z                      }|d
k    r*t          t          j        ||j        z                      } nd} |	r.t'          ||f|                                dd         |	f          }|d
         d}|d
         d}||t+          d          t-          ||||	dd          }t.          j                            |j        ddddf         |||d          \  }!}"t.          j                            |||!|ddd||	|d|| |          \  }#}$|#|$z  }%t7          |%|"|!|$          }&|
r/t.          j                            |#|$          }'|'dd         }'|'|&d<   t          j        |dz   fi |& |!                    |dz   dd           t?          |          r		 d
dl }(|(!                    d           d
dl"m#}) n?# tH          $ r2 tK          dtL          j'                   tM          j(        d           Y nw xY w|d}*n|(j)        j*        +                    |          }*tY          j-        |           d .                    |          z   }+||d!z   },n|},d"}-t.          j        /                    ||%|||||||*|+|,|,|-|#           |D ]!}.|)0                    |d$z   |.z   d%&            dS dS )'a  
    Calculate saddle statistics and generate saddle plots for an arbitrary
    signal track on the genomic bins of a contact matrix.

    COOL_PATH : The paths to a .cool file with a balanced Hi-C map. Use the
    '::' syntax to specify a group path in a multicooler file.

    TRACK_PATH : The path to bedGraph-like file with a binned compartment track
    (eigenvector), including a header. Use the '::' syntax to specify a column
    name.

    EXPECTED_PATH : The paths to a tsv-like file with expected signal,
    including a header. Use the '::' syntax to specify a column name.

    Analysis will be performed for chromosomes referred to in TRACK_PATH, and
    therefore these chromosomes must be a subset of chromosomes referred to in
    COOL_PATH and EXPECTED_PATH.

    COOL_PATH, TRACK_PATH and EXPECTED_PATH must be binned at the same
    resolution (expect for  EXPECTED_PATH in case of trans contact type).

    EXPECTED_PATH must contain at least the following columns for cis contacts:
    'chrom', 'diag', 'n_valid', value_name and the following columns for trans
    contacts: 'chrom1', 'chrom2', 'n_valid', value_name value_name is controlled
    using options. Header must be present in a file.

    chromstartendN)usecolsdtypecommentverboseT)check_sortingc                     g | ]H\  }}|j         j        d          t          j        |j                  t          j        |j                  fIS )r   )r1   ilocnpnanminr2   nanmaxr3   ).0igroups      Q/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/cli/saddle.py
<listcomp>zsaddle.<locals>.<listcomp>   sU     	
 	
 	
5 [a ")EK"8"8")EI:N:NO	
 	
 	
    )contact_typeexpected_value_colsverify_viewverify_coolerr      r   z,only one of vrange or qrange can be suppliedF)view_dfclr_weight_namemask_clr_bad_binsdrop_track_na   z.d)r    qrangedigitized_suffixname)	r    rN   rI   rJ   expected_value_colview_name_colmin_diagmax_diagr7   )
saddledatabinedges	digitizedsaddlecountsr   saddle_strengthz.saddledumpz.digitized.tsv	)sepindexAggzInstall matplotlib to use )file)g?g?g?z ({})z
 quantilesz(contact frequency / expected))r    rN   scalevminvmaxcolortitlexlabelylabelclabelcmap.tight)bbox_inches)1coolerCoolerr;   str_int64float64pd
read_tabler   r   bioframemake_viewframereset_indexgroupbyr   intceilbinsizefloorr	   bins
ValueErrorr
   r   saddledigitizer:   dictrY   savezto_csvlen
matplotlibusematplotlib.pyplotpyplotImportErrorprintsysstderrexitcolorscolorConverterto_rgbopbasenameformat
saddleplotsavefig)/r   r   r   rD   min_distmax_distn_binsr    rN   rJ   strengthview
out_prefixfigr_   rg   r`   ra   
hist_colorr7   clrrQ   
track_nametrack_columnstrack_dtypetrackcooler_view_dfrI   track_view_dfexpected_summary_colsexpectedrS   rT   digitized_trackrV   SCrU   to_saveratiosmplpltrb   rc   track_labelrf   exts/                                                  rA   r|   r|      s   v -	
"
"C(5%M%'J
 guj9M 	rxBJ	K M  E &c**N | *4DIII +	
 	
!--//77@@	
 	
 	
 M 	 '!1  H !||rwx#+566771}}rx3; 67788 
$J#((**QQQ-!A
 
 ayay!3GHHH $'  E !$
 3 3
111bqb5 !4 ! !OX :'-   DAq  QJ!	  G  ,++Aq11"%+!" HZ-'337333:(88d%PPP 3xx -E	$$$$GGENNN+++++++ 	 	 	.SZ@@@@HQKKKKK	 EE J-44Z@@EI&&)E)EE$|3KK$K1
 	 	
 	
 	
"  	E 	ECKK
S(3.GKDDDD[-E -EX	E 	Es   2K 9LL)"	functoolsr   os.pathpathr   r   pandasrp   numpyr;   rk   rr    r   clickutilr   
lib.commonr   r	   r
   lib.ior   r   r   commandargumentstroptionChoicerv   floatPathr|    rC   rA   <module>r      s5               



                        X X X X X X X X X X F F F F F F F F       3   	W\$777	   	W\.AAA	   	,	ug&	'	'   
N	   
*	   	6	   
#
 
   
L
 
 
 
 	/	   	A	   
Z
 
4	 	 	 
 
 
 
    	???	@	@
	   	&	x'	(	(   0*SW   
? 
   :PQ   n#FGGG+3T5  aE aE  HG      
 
  
 
         RaE aE aErC   