
    DUfF                        d dl Z d dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ ddlmZmZmZmZmZmZ ddlmZmZ ddlmZmZmZ ej                            d	          Z eed                    d k    rd d
lm Z  d\dZ! ej"                    d             Z#d Z$d Z%e$e% ej&        ddde ed          d          d                                     Z'e$e% ej&        dddedd           ej&        ddded           ej&        d d!d"ed#           ej&        d$d%d&dd'd(           ej&        d)d*d+edd          d,                                                                                     Z(e$e% ej&        dddedd           ej&        d$d%d&dd'd(           ej&        d)d*d+edd           ej&        d-d.e)d/d          d0                                                                         Z*e$e% ej&        d1d2d3ed4           ej&        d5d6d7ed4           ej&        ddded4           ej&        d d!d"ed4           ej&        d$d%d&dd'd(           ej&        d8e)ddd9:           ej&        d;d<d=dd'>           ej&        d? ej+        d@dAg          d@dBdC           ej&        dDdEe)dF           ej&        dddGedH           ej&        dIdJeK           ej&        dLdMedNd           ej&        dOdP ej,        dd'ddQ          K           ej&        dRdSdd'>           ej&        dTdUV           ej&        dWdXdd'dYZ          d[                                                                                                                                                                                                                         Z-dS )]    N)compose)Pool   )HDF5AggregatorPairixAggregatorTabixAggregatoraggregate_recordscreate_coolersanitize_records   )cli
get_logger)
parse_binsparse_field_paramparse_kv_list_param.)
get_handle#c                 D   g }|st          d          |                                }d\  }}t          | d          r| j        j        }| j        j        }n.t          | d          r| j        }| j        }nt          d           |d          }|                    |          rz |            }t          |t                    r|	                                }|
                    |                                            |d          }|                    |          z|| fS )a  Returns a header from the stream and an the reaminder of the stream
    with the actual data.
    Parameters
    ----------
    instream : a file object
        An input stream.
    comment_char : str
        The character prepended to header lines (use '@' when parsing sams,
        '#' when parsing pairsams).
    Returns
    -------
    header : list
        The header lines, stripped of terminal spaces and newline characters.
    remainder_stream : stream/file-like object
        Stream with the remaining lines.

    zPlease, provide a comment char!)NNbufferpeekz7Cannot find the peek() function of the provided stream!r   )
ValueErrorencodehasattrr   r   readline
startswith
isinstancebytesdecodeappendstrip)	instreamcomment_charheadercomment_byteread_fpeek_f
readline_fcurrent_peeklines	            M/var/www/html/software/conda/lib/python3.11/site-packages/cooler/cli/cload.py
get_headerr,      s1   $ F <:;;;&&((LNFFx"" T%_-

	6	"	" T&

RSSS6!99L

!
!,
/
/ 
! z||dE"" 	!;;==Ddjjll###vayy 
!
!,
/
/ 
! 8    c                      dS )z
    Create a cooler from genomic pairs and bins.

    Choose a subcommand based on the format of the input contact list.

    N r/   r-   r+   cloadr0   N   s	     	Dr-   c                    t                                           t          j        dt          d           t          j        dt          j        dd          d           t          j        dd	
           t          j        dd           t          j        dd          |                                                             S )NbinsBINS)typemetavar
pairs_pathT)exists
allow_dash
PAIRS_PATH	cool_path	COOL_PATH)r5   z
--metadataz+Path to JSON file containing user metadata.helpz
--assemblyz)Name of genome assembly (e.g. hg19, mm10))r0   commandclickargumentstrPathoptionfuncs    r+   register_subcommandrF   Z   s    ==??8vC888ENZt===$  
 A{K@@@EL$+X  (!L       	 	 	
 	
  r-   c                 D    | j                             d          | _         | S )Na  
    BINS : One of the following

        <TEXT:INTEGER> : 1. Path to a chromsizes file, 2. Bin size in bp

        <TEXT> : Path to BED file defining the genomic bin segmentation.

    PAIRS_PATH : Path to contacts (i.e. read pairs) file.

    COOL_PATH : Output COOL file path or URI.)__doc__formatrD   s    r+   add_arg_helprJ   q   s'    <&&		1 DL Kr-   z--chunksizez-czFControl the number of pixels handled by each worker process at a time.g    חAT)r=   r4   defaultshow_defaultc           	      N   t          |           \  }} |;t          |          5 }t          j        |          }ddd           n# 1 swxY w Y   t	          j        |d          5 }t          ||| |          }	t          || |	||d           ddd           dS # 1 swxY w Y   dS )z
    Bin a hiclib HDF5 contact list (frag) file.

    {}

    hiclib on BitBucket: <https://github.com/mirnylab/hiclib-legacy>.

    NrTmetadataassemblyordered)r   openjsonloadh5pyFiler   r
   )
r2   r6   r:   rP   rQ   	chunksize
chromsizesfh5pairsiterators
             r+   hiclibr]      s8   ( "$''J(^^ 	$qy||H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 
:s	#	# 	
w!':tYGG	
 	
 	
 	
	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
s#   AA	A	%(BB!Bz--nprocz-pz.Number of processes to split the work between.   z--chrom2z-c2zchrom2 field number (one-based)   )r=   r4   rK   z--pos2z-p2zpos2 field number (one-based)   z--zero-basedz-0zPositions are zero-basedF)r=   is_flagrK   rL   z--max-splitz-szDivide the pairs from each chromosome into at most this many chunks. Smaller chromosomes will be split less frequently or not at all. Increase ths value if large chromosomes dominate the workload on multiple processors.c                 \   t          t                    }	t          |           \  }
} |;t          |          5 }t	          j        |          }ddd           n# 1 swxY w Y   	 t          }|dk    r/t          |          }|	                    d| d           |j	        }i }d|v r|d         dz
  |d<   d|v r|d         dz
  |d<   t          ||
| f|| |d	|}t          || |||d
           |dk    r|                                 dS dS # |dk    r|                                 w w xY w)z
    Bin a tabix-indexed contact list file.

    {}

    See also: 'cooler csort' to sort and index a contact list file

    Tabix manpage: <http://www.htslib.org/doc/tabix.html>.

    Nr   Using  coreschrom2C2pos2P2)mapis_one_basedn_chunksTrO   )r   __name__r   rS   rT   rU   ri   r   infoimapr   r
   close)r2   r6   r:   rP   rQ   nproc
zero_based	max_splitkwargsloggerrY   rZ   map_funcpooloptsr\   s                   r+   tabixrx      s   p !!F!$''J(^^ 	$qy||H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$!199;;DKK....///yHv)A-DJV!+DJ"
 (.
 
 
 
 		
 	
 	
 	
 199JJLLLLL 95199JJLLLL s   AA A%BD D+z--block-charzICharacter separating contig names in the block names of the pairix index.|c	           	         t          t                    }	t          |           \  }
} |;t          |          5 }t	          j        |          }ddd           n# 1 swxY w Y   	 t          }|dk    r/t          |          }|	                    d| d           |j	        }t          ||
| || ||          }t          || |||d           |dk    r|                                 dS dS # |dk    r|                                 w w xY w)z
    Bin a pairix-indexed contact list file.

    {}

    See also: 'cooler csort' to sort and index a contact list file

    Pairix on GitHub: <https://github.com/4dn-dcic/pairix>.

    Nr   rc   rd   )ri   rj   rk   
block_charTrO   )r   rl   r   rS   rT   rU   ri   r   rm   rn   r   r
   ro   )r2   r6   r:   rP   rQ   rp   rq   rr   r{   rt   rY   rZ   ru   rv   r\   s                  r+   pairixr|   
  sq   t !!F!$''J(^^ 	$qy||H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$199;;DKK....///yH#(.!
 
 
 		
 	
 	
 	
 199JJLLLLL 95199JJLLLL s   AA A%A(C+ +Dz--chrom1z-c1zchrom1 field number (one-based))r=   r4   requiredz--pos1z-p1zpos1 field number (one-based)z--comment-charz1Comment character that indicates lines to ignore.)r4   rK   rL   r=   z--no-symmetric-upperz-NztCreate a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values)r=   ra   rK   z--input-copy-statusuniqueduplexa  Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric map, regardless of how the coordinates of a pair are ordered. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All input records that map to the lower triangle will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option. )r4   rK   r=   rL   z--fielda  Specify quantitative input fields to aggregate into value columns using the syntax ``--field <field-name>=<field-number>``. Optionally, append ``:`` followed by ``dtype=<dtype>`` to specify the data type (e.g. float), and/or ``agg=<agg>`` to specify an aggregation function different from sum (e.g. mean). Field numbers are 1-based. Passing 'count' as the target name will override the default behavior of storing pair counts. Repeat the ``--field`` option for each additional field.)r=   r4   multiplezSize in number of lines/records of data chunks to read and process from the input stream at a time. These chunks will be saved as temporary partial coolers and then merged.i z
--mergebufzmTotal number of pixel records to buffer per epoch of merging data. Defaults to the same value as `chunksize`.)r=   r4   z--max-mergez3Maximum number of chunks to merge in a single pass.   z
--temp-dirzaCreate temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.)r7   	file_okaydir_okayr8   z--no-delete-tempz,Do not delete temporary files when finished.z--storage-optionszOptions to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.r<   z--appendz-az_Pass this flag to append the output cooler to an existing file instead of overwriting the file.)ra   rK   r=   c           
      2  ( t          |           \  }} ||
}| }d}|r|dk    rd}n|dk    rd}|;t          |          5 }t          j        |          }ddd           n# 1 swxY w Y   g d}t          t
          j        t          t
          j        d}i (dD ]2}||         dk    rt          j        d|	          ||         d
z
  (|<   3g }i }i }t          |	          r|	D ]}t          |          \  }}}}|$||
|dv r|||<   &t          j        d|	          ||vr|                    |           ||vr|                    |           |(|<   |
|||<   |||<   ||||<   d||<   d|vr|                    d           |Ht          |          } | D ]5}!t          | |!         t                    rt          | |!                   | |!<   6nd} i }|dk    rt           j        }"nlt%          t&          d                   d
k    r7t%          t&          d
                   dk     rt)          |dd          d         }"nt)          |dd          j        }"t-          |"          \  }#}"t/          j        |"fd(fd|D             ||d|
d|}$t3          | dd| |dd          }%t5          |dd          }&t7          |&|%          }'t9          || t;          |'|$          fi d|d|d|d|d |d!|d"|d#| d$dd%dd&dd'dd(|d)| d*dd+|rd,nd- dS ).z
    Bin any text file or stream of pairs.

    Pairs data need not be sorted. Accepts compressed files.
    To pipe input from stdin, set PAIRS_PATH to '-'.

    {}

    Nr~   reflectr   drop)chrom1pos1re   rg   r   zField numbers start at 1)
param_hintr   >   countbin1_idbin2_idzA field number is required.sumr   -r   rN   infer)modecompression	c                      g | ]
}|         S r/   r/   ).0nameinput_field_numberss     r+   
<listcomp>zpairs.<locals>.<listcomp>r  s    IIIt$T*IIIr-   T)sepusecolsnamesdtyper\   rX   pairs)schemadecode_chromsrj   tril_actionsortvalidateF)aggr   r   columnsdtypesrP   rQ   mergebuf	max_mergetemp_dirdelete_tempboundscheck	triucheckdupcheckensure_sortedsymmetric_upperh5optsrR   r   aw)r   rS   rT   rU   rA   npint64r?   BadParameterlenr   r    r   r   listtuplesysstdinint_pandas_versionr   handler,   pdread_csvr   r	   r   r
   ri   ))r2   r6   r:   rP   rQ   rq   r#   input_copy_statusno_symmetric_upperfieldrX   r   r   no_delete_tempr   storage_optionsr    rs   rY   r   r   rZ   input_field_namesinput_field_dtypesr   output_field_namesoutput_field_dtypesaggregationsargcolnumr   r   r   keyf_in_readersanitize	aggregatepipeliner   s)                                           @r+   r   r   i  s   p "$''J,,OK !((#KK(** K(^^ 	$qy||H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$   	  4 5 5$<1$%?DQQQQ$*4L1$4D!! L
5zz "+ !	+ !	+C'8'='=$D&% ~K) ???05'-,5#    ,,,!((...---"))$///(.% +0"4(,1#D)%(T""%*T"" (((!!'*** "$_55 	1 	1C&+t,, 1#F3K00s	1  FSy	_Q	 	 A	%	%#oa.@*A*AA*E*E*3GDDDQG *3GDDDKGAt[	IIII7HIII 	 	 	 	F  #^  H "l$UKKKIy(++HHf   #"	
 #"    )  '&& E %  e  (!" v#$ %& #SS'   s   A""A&)A&)r   ).r   r?   rV   numpyr   pandasr   
simplejsonrT   cytoolzr   multiprocessr   creater   r   r   r	   r
   r    r   r   _utilr   r   r   __version__splitr   r   pandas.io.commonr   r,   groupr0   rF   rJ   rC   r]   rx   rA   r|   ChoicerB   r   r/   r-   r+   <module>r      sb   



                                                  E E E E E E E E E E.&&s++3qQ++++++/ / / /d 	 	 	  .    	Q	CJJ  
 
   
8 	9	   =CQR   e9Q   	#   
 

 
 
= =
 
      F=@ 	9	   	#   
 

 
 
 
	  7 7 
 
    J7t =CRV   e9d   =CRV   e9d   	#   		<   
@   	x*	+	+
:    
? 
  & 
' 
   
1		   	>	   
%	454D	Q	Q	Q	   	7	   
   
'  v v       '           vv v vr-   