
    DUf+                        d dl Z d dlZd dlZd dlZd dlZddlm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZmZmZ  ej                     ej        dd	           ej        d
 ej        dd          d           ej        dd	           ej        ddd ej        ddg          d           ej        dd           ej        dd           ej        dded           ej        ddd d!"           ej        d#dd d$"           ej        d%ed&dd'(           ej        d)d*d+dd ,           ej        d- ej        d.d/g          d.d0d1           ej        d2d3d4ed56           ej        d7d8e9           ej        d:d; ej        dd dd<          9           ej        d=d>ed?d@           ej        dAdBdd ,           ej        dCdD           ej        dEdFdd dG"          dH                                                                                                                                                                                                                                                 ZdS )I    N   )	BIN_DTYPECOUNT_DTYPEcreate_from_unorderedsanitize_pixelssanitize_records   )cli
get_logger)
parse_binsparse_field_paramparse_kv_list_param	bins_path	BINS_PATH)metavarpixels_pathT)exists
allow_dashPIXELS_PATH)typer   	cool_path	COOL_PATHz--formatz-fz'coo' refers to a tab-delimited sparse triplet file (bin1, bin2, count). 'bg2' refers to a 2D bedGraph-like file (chrom1, start1, end1, chrom2, start2, end2, count).coobg2)helpr   requiredz
--metadataz+Path to JSON file containing user metadata.)r   z
--assemblyz)Name of genome assembly (e.g. hg19, mm10)z--fielda  Add supplemental value fields or override default field numbers for the specified format. Specify quantitative input fields to aggregate into value columns using the syntax ``--field <field-name>=<field-number>``. Optionally, append ``:`` followed by ``dtype=<dtype>`` to specify the data type (e.g. float). Field numbers are 1-based. Repeat the ``--field`` option for each additional field.)r   r   multiplez--count-as-floatFz{Store the 'count' column as floating point values instead of as integers. Can also be specified using the `--field` option.)is_flagdefaultr   z--one-basedzWPass this flag if the bin IDs listed in a COO file are one-based instead of zero-based.z--comment-char#z1Comment character that indicates lines to ignore.)r   r   show_defaultr   z--no-symmetric-upperz-NztCreate a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values)r   r   r   z--input-copy-statusuniqueduplexa  Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric matrix, regardless of how element coordinates are ordered. Execution will be aborted if duplicates are detected. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All lower-triangle input elements will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option instead. )r   r   r   r!   z--chunksizez-czSize in number of lines/records of data chunks to read and process from the input stream at a time. These chunks will be saved as temporary partial coolers and then merged.i -1)r   r   r   z
--mergebufzgTotal number of records to buffer per epoch of merging data. Defaults to the same value as `chunksize`.)r   r   z
--temp-dirzaCreate temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.)r   	file_okaydir_okayr   z--max-mergez3Maximum number of chunks to merge in a single pass.   )r   r   r   r!   z--no-delete-tempz,Do not delete temporary files when finished.z--storage-optionszOptions to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.z--appendz-az_Pass this flag to append the output cooler to an existing file instead of overwriting the file.c                   ( t          t                    }t          |           \  }}||}| }d}|r|
dk    rd}n|
dk    rd}|;t          |          5 }t	          j        |          }ddd           n# 1 swxY w Y   ddg}t          t          t          d}|d	k    rg d
}t          t          t          t          t          t          |d         d}|
                    dd          |
                    dd          |
                    dd          |
                    dd          |
                    dd          |
                    dd          dd(t          |d	||d          }n9|dk    r3ddg}t          t          |d         d}dddd(t          |||d          }t          |          r|D ]}t          |d          \  } }!}"}#|!_| d v r|"|"|| <   &| dk    r7|"5|                    d           |                    d           |"|| <   |"|| <   ct!          j        d!|"          | |vr|                    |            | |vr|                    |            |!(| <   |"
|"|| <   |"|| <   n*|                    d           |                    d           d|v r |rt$          j        |d<   t$          j        |d<   |Ht)          |          }$|$D ]5}%t+          |$|%         t,                    rt/          |$|%                   |$|%<   6nd}$|d#k    rt0          j        }&n|}&t5          j        |&d$(fd%|D             |||	d|&          }'|                    d'(            |                    d(|            |                    d)|            t;          ||t=          ||'          |||||||| d|rdnd||$|rd*nd+,           dS )-a  
    Create a cooler from a pre-binned matrix.

    BINS_PATH : One of the following

        <TEXT:INTEGER> : 1. Path to a chromsizes file, 2. Bin size in bp

        <TEXT> : Path to BED file defining the genomic bin segmentation.

    PIXELS_PATH : Text file containing nonzero pixel values. May be gzipped.
    Pass '-' to use stdin.

    COOL_PATH : Output COOL file path or URI.

    **Notes**

    Two input format options (tab-delimited).
    Input pixel file may be compressed.

    COO: COO-rdinate sparse matrix format (a.k.a. ijv triple).
    3 columns: "bin1_id, bin2_id, count",

    BG2: 2D version of the bedGraph format.
    7 columns: "chrom1, start1, end1, chrom2, start2, end2, count"

    **Examples**

    cooler load -f bg2 <chrom.sizes>:<binsize> in.bg2.gz out.cool

    Nr"   reflectr#   dropbin1_idbin2_id)r*   r+   countr   )chrom1start1end1chrom2start2end2r,   )r-   r.   r/   r0   r1   r2   r,   r-   r   r.   r	   r/   r   r0      r1      r2         T)schemais_one_basedtril_actionsortr   )r8   r9   r:   F)includes_agg>   r*   r+   zA field number is required.)
param_hint-	c                      g | ]
}|         S  r@   ).0nameinput_field_numberss     L/var/www/html/software/conda/lib/python3.11/site-packages/cooler/cli/load.py
<listcomp>zload.<locals>.<listcomp>M  s    IIIt$T*III    )sepusecolsnamesdtypecommentiterator	chunksizezfields: zdtypes: zsymmetric-upper: aw)columnsdtypesmetadataassemblymergebuf	max_mergetemp_dirdelete_tempensure_sorted	triuchecksymmetric_upperh5optsmode)r   __name__r   openjsonloadr   r   strintgetr   r   lenr   appendclickBadParameternpfloat64r   
isinstancelisttuplesysstdinpdread_csvinfor   map))r   r   r   formatrR   rS   fieldcount_as_float	one_basedcomment_charinput_copy_statusno_symmetric_upperrM   rT   rU   rV   no_delete_tempstorage_optionsre   kwargslogger
chromsizesbinsrZ   r9   foutput_field_namesoutput_field_dtypesinput_field_namesinput_field_dtypespipelineargrB   colnumrJ   _r[   keyf_inreaderrC   s)                                           @rD   r`   r`      s[   \ !!F!),,J,,OK !((#KK(** K (^^ 	$qy||H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ $Y/  
 
 
 (1
 
 jj1--jj1--JJvq))jj1--jj1--JJvq))
 
 $"#
 
 
 
5
 (1
 

 +,AFF"yk
 
 

 5zz $+ 	2 	2C%6s%O%O%O"D&% ~111e6G05'-W__):%,,W555&--g666/4&t,05'-,5#    ,,,!((...---"))$///(.% +0"4(,1#D)=	2D 	  )))!!'***####&(j7#')zG$ "$_55 	1 	1C&+t,, 1#F3K00s	1  cy[IIII7HIII 	 	 	F KK0.00111
KK/-//000
KK5O55666Hf""&& *4$$u'#SS%     s   A66A:=A:)rm   rf   numpyrh   pandasro   
simplejsonr_   creater   r   r   r   r    r
   r   _utilr   r   r   commandargumentPathoptionChoicera   rb   r`   r@   rF   rD   <module>r      s|   



                                   E E E E E E E E E E [111

$4@@@-   [111
; 
uen	%	%	 	 	 l!NOOOl!LMMM
? 
   
B	   
	   		<   
@   	x*	+	+
B    
' 
   
(		   
%	454D	Q	Q	Q	   	>	   	7	   
   
'  _ _              NM PO	 	 21  21 t_ _ _rF   