
    tf+                        d dl Z d dlZd dlZd dlZd dlZddlm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZmZmZ  ej*                          ej,                  dd	       ej,                  d
 ej.                  dd      d       ej,                  dd	       ej0                  ddd ej2                  ddg      d       ej0                  dd       ej0                  dd       ej0                  dded       ej0                  ddd d!"       ej0                  d#dd d$"       ej0                  d%ed&dd'(       ej0                  d)d*d+dd ,       ej0                  d- ej2                  d.d/g      d.d0d1       ej0                  d2d3d4ed56       ej0                  d7d8e9       ej0                  d:d; ej.                  dd dd<      9       ej0                  d=d>ed?d@       ej0                  dAdBdd ,       ej0                  dCdD       ej0                  dEdFdd dG"      dH                                                                                                                                             Zy)I    N   )	BIN_DTYPECOUNT_DTYPEcreate_from_unorderedsanitize_pixelssanitize_records   )cli
get_logger)
parse_binsparse_field_paramparse_kv_list_param	bins_path	BINS_PATH)metavarpixels_pathT)exists
allow_dashPIXELS_PATH)typer   	cool_path	COOL_PATHz--formatz-fz'coo' refers to a tab-delimited sparse triplet file (bin1, bin2, count). 'bg2' refers to a 2D bedGraph-like file (chrom1, start1, end1, chrom2, start2, end2, count).coobg2)helpr   requiredz
--metadataz+Path to JSON file containing user metadata.)r   z
--assemblyz)Name of genome assembly (e.g. hg19, mm10)z--fielda  Add supplemental value fields or override default field numbers for the specified format. Specify quantitative input fields to aggregate into value columns using the syntax ``--field <field-name>=<field-number>``. Optionally, append ``:`` followed by ``dtype=<dtype>`` to specify the data type (e.g. float). Field numbers are 1-based. Repeat the ``--field`` option for each additional field.)r   r   multiplez--count-as-floatFz{Store the 'count' column as floating point values instead of as integers. Can also be specified using the `--field` option.)is_flagdefaultr   z--one-basedzWPass this flag if the bin IDs listed in a COO file are one-based instead of zero-based.z--comment-char#z1Comment character that indicates lines to ignore.)r   r   show_defaultr   z--no-symmetric-upperz-NztCreate a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values)r   r   r   z--input-copy-statusuniqueduplexa  Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric matrix, regardless of how element coordinates are ordered. Execution will be aborted if duplicates are detected. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All lower-triangle input elements will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option instead. )r   r   r   r!   z--chunksizez-czSize in number of lines/records of data chunks to read and process from the input stream at a time. These chunks will be saved as temporary partial coolers and then merged.i -1)r   r   r   z
--mergebufzgTotal number of records to buffer per epoch of merging data. Defaults to the same value as `chunksize`.)r   r   z
--temp-dirzaCreate temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.)r   	file_okaydir_okayr   z--max-mergez3Maximum number of chunks to merge in a single pass.   )r   r   r   r!   z--no-delete-tempz,Do not delete temporary files when finished.z--storage-optionszOptions to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.z--appendz-az_Pass this flag to append the output cooler to an existing file instead of overwriting the file.c                    t        t              }t        |       \  }}||}| }d}|r|
dk(  rd}n|
dk(  rd}|)t        |      5 }t	        j
                  |      }ddd       ddg}t        t        t        d}|d	k(  rg d
}t        t        t        t        t        t        |d   d}|j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  dd      dd}t        |d	||d      }n/|dk(  r*ddg}t        t        |d   d}dddd}t        |||d      }t        |      r|D ]  } t        | d      \  }!}"}#}$|"W|!d v r|#|#||!<   #|!dk(  r/|#-j                  d       |j                  d       |#|!<   |#||!<   Wt!        j"                  d!| "      |!vr|j                  |!       |!|vr|j                  |!       |"|!<   |#|#|!<   |#||!<    n"j                  d       |j                  d       dv r(|r&t$        j&                  d<   t$        j&                  |d<   |8t)        |      }%|%D ]'  }&t+        |%|&   t,              st/        |%|&         |%|&<   ) nd}%|d#k(  rt0        j2                  }'n|}'t5        j6                  |'d$|D !cg c]  }!|!   	 c}!||	d|%      }(|j9                  d&        |j9                  d'|        |j9                  d(|        t;        ||t=        |(      |||||||| d|rdnd||%|rd)+       yd*+       y# 1 sw Y   xY wc c}!w ),a  
    Create a cooler from a pre-binned matrix.

    BINS_PATH : One of the following

        <TEXT:INTEGER> : 1. Path to a chromsizes file, 2. Bin size in bp

        <TEXT> : Path to BED file defining the genomic bin segmentation.

    PIXELS_PATH : Text file containing nonzero pixel values. May be gzipped.
    Pass '-' to use stdin.

    COOL_PATH : Output COOL file path or URI.

    **Notes**

    Two input format options (tab-delimited).
    Input pixel file may be compressed.

    COO: COO-rdinate sparse matrix format (a.k.a. ijv triple).
    3 columns: "bin1_id, bin2_id, count",

    BG2: 2D version of the bedGraph format.
    7 columns: "chrom1, start1, end1, chrom2, start2, end2, count"

    **Examples**

    cooler load -f bg2 <chrom.sizes>:<binsize> in.bg2.gz out.cool

    Nr"   reflectr#   dropbin1_idbin2_id)r*   r+   countr   )chrom1start1end1chrom2start2end2r,   )r-   r.   r/   r0   r1   r2   r,   r-   r   r.   r	   r/   r   r0      r1      r2         T)schemais_one_basedtril_actionsortr   )r8   r9   r:   F)includes_agg>   r*   r+   zA field number is required.)
param_hint-	)sepusecolsnamesdtypecommentiterator	chunksizezfields: zdtypes: zsymmetric-upper: aw)columnsdtypesmetadataassemblymergebuf	max_mergetemp_dirdelete_tempensure_sorted	triuchecksymmetric_upperh5optsmode)r   __name__r   openjsonloadr   r   strintgetr   r   lenr   appendclickBadParameternpfloat64r   
isinstancelisttuplesysstdinpdread_csvinfor   map))r   r   r   formatrJ   rK   fieldcount_as_float	one_basedcomment_charinput_copy_statusno_symmetric_upperrE   rL   rM   rN   no_delete_tempstorage_optionsr]   kwargslogger
chromsizesbinsrR   r9   foutput_field_namesoutput_field_dtypesinput_field_namesinput_field_dtypesinput_field_numberspipelineargnamecolnumrB   _rS   keyf_inreaders)                                            Y/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/cooler/cli/load.pyrX   rX      s!   \ !F!),J,,OK(#K(* K (^ 	$qyy|H	$ $Y/ 
 (1
 jj1-jj1-JJvq)jj1-jj1-JJvq)
 $"#
 
5
 (1

 +,AF"yk

 5z 	2C%6s%O"D&% ~11e6G05'-W_):%,,W5&--g6/4&t,05'-,,5#  ,,!((.--"))$/(.% +0"4(,1#D)=	2D 	  )!!'*##&(jj7#')zzG$ "$_5 	1C&+t,#F3K0s	1  cyy[[7HIt$T*I 	F KK(./01
KK(-./0
KK#O#456Hf""&& *$u'S%$ !$%S	$ 	$z Js   MMM)re   r^   numpyr`   pandasrg   
simplejsonrW   creater   r   r   r   r    r
   r   _utilr   r   r   commandargumentPathoptionChoicerY   rZ   rX        r   <module>r      s   
       E E [1


$4@- [1
; 
uen	%	 l!NOl!LM
? 
 
B	 
	 		< 
@ 	x*	+
B  
' 
 
(		 
%	454D	Q	 	>	 	7	 
 
'_ N P	 2 2 t_r   