
    DUf:                        d dl mZ ddlmZ ddlmZ d dlZddlmZ ddl	m
Z
  e
j                     ej        d	d
e eed                     ej        dded           ej        dddeddd           ej        ddd ej        ddg          ddd           ej        dddeddd           ej        d d!d"edd#d           ej        d$d%d& ej        g d'          d(dd)           ej        d*d+d, ej        g d'          d-dd)           ej        d.d/d0ed1          d2                                                                                                                         ZdS )3    )partial   )expected)read_expected_from_fileN   )validate_csv)cliexpected_pathEXPECTED_PATHzbalanced.sum)default_column)metavartypecallbackoutput_prefixOUTPUT_PREFIX)r   r   nargsz--bins-per-order-magnitudebins_per_order_magnitudezaHow many bins per order of magnitude. Default of 10 has a ratio of neighboring bins of about 1.25
   T)r   helpr   r   defaultshow_defaultz--bin-layout
bin_layoutaZ  'fixed' means that bins are exactly the same for different datasets, and only depend on bins_per_order_magnitude 'longest_regio' means that the last bin will end at size of the longest region. 
GOOD: the last bin will have as much data as possible. 
BAD: bin edges will end up different for different datasets, you can't divide them by each otherfixedlongest_regionz--min-nvalid
min_nvalida  For each region, throw out bins (log-spaced) that have less than min_nvalid valid pixels. This will ensure that each entree in Pc by region has at least n_valid valid pixels. Don't set it to zero, or it will introduce bugs. Setting it to 1 is OK, but not recommended.   z--min-count	min_countzIf counts are found in the data, then for each region, throw out bins (log-spaced) that have more than min_counts of counts.sum (raw Hi-C counts). This will ensure that each entree in P(s) by region has at least min_count raw Hi-C reads2   z--spread-funcsspread_funcsa	  A way to estimate the spread of the P(s) curves between regions. * 'minmax' - the minimum/maximum of by-region P(s)
* 'std' - weighted standard deviation of P(s) curves (may produce negative results)
 * 'logstd' (recommended) weighted standard deviation in logspace)minmaxstdlogstdr"   )r   r   r   r   r   r   z--spread-funcs-slopespread_funcs_slopezASame as spread-funcs, but for slope (derivative) ratehr than P(s)r!   z--resolution
resolutionzlData resolution in bp. If provided, additonal column of separation in bp (s_bp) will be added to the outputs)r   r   r   r   c	                    | \  } }	|	g}
d|
vr|
                     d           t          | d|
          }|	                    d          ^}}| d}t          j        ||	||||          \  }}}t          j        |||||          \  }}||d	         |z  |d
<   |d	         |z  |d
<   |                    | dddd           |                    | dddd           dS )a  
    Logarithmically bin expected values generated using compute_expected for cis data.

    This smoothes the data, resulting in clearer plots and more robust analysis results.
    Also calculates derivative after gaussian smoothing.
    For a very detailed escription, see
    https://github.com/open2c/cooltools/blob/51b95c3bed8d00a5f1f91370fc5192d9a7face7c/cooltools/expected.py#L988

    EXPECTED_PATH : The paths to a .tsv file with output of compute_expected.
    Must include a header. Use the '::' syntax to specify a summary column name.

    OUTPUT_PREFIX: Output file name prefix to store the logbinned expected
    (prefix.log.tsv) and derivative (prefix.der.tsv) in the tsv format."
    z	count.sumcis)contact_typeexpected_value_cols.z.avg)summary_namer   r   r   r   )Pc_namebinned_exp_sloper   r#   Nzdist.avgs_bpz.log.tsv	Fnan)sepindexna_repz.der.tsv)appendr   splitr   logbin_expectedcombine_binned_expectedto_csv)r
   r   r   r   r   r   r   r#   r$   exp_summary_nameexpected_summary_colscvdexp_summary_base_r+   lb_cvd	lb_slopeslb_distbins
lb_cvd_agglb_slopes_aggs                       Z/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/cli/logbin_expected.pyr5   r5   
   s|   b '4#M#-.///$$[111
!1  C ,11#66q!'''G%-%=%!9& & &"FI{ !) @"!-! ! !J '
3j@
6 -j 9J Ff"""	     """	          )	functoolsr   apir   lib.ior   clickutilr    r	   commandargumentstroptionintChoicer5    rC   rB   <module>rQ      s               , , , , , ,              	W\.AAA	   s!LLL &
B	
	 	 	 
* 
w 01	2	2
   

 

   
 

   
G 
111	2	2
    	L	111	2	2
   
*	
  L L      	 	 ML  lL L LrC   