o
    Nrf+                     @   s  d dl Z d dlZd dlZd dlZd dlZddlm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZmZmZ e ejddd	ejd
ejdddddejddd	ejdddeddgddejdddejdddejddeddejddd d!d"ejd#dd d$d"ejd%ed&dd'd(ejd)d*d+dd d,ejd-ed.d/gd.d0dd1ejd2d3d4ed5d6ejd7d8ed9ejd:d;ejdd ddd<d9ejd=d>ed?dd@ejdAdBdd d,ejdCdDdejdEdFdd dGd"dHdI ZdS )J    N   )	BIN_DTYPECOUNT_DTYPEcreate_from_unorderedsanitize_pixelssanitize_records   )cli
get_logger)
parse_binsparse_field_paramparse_kv_list_param	bins_pathZ	BINS_PATH)metavarpixels_pathT)exists
allow_dashZPIXELS_PATH)typer   	cool_pathZ	COOL_PATHz--formatz-fz'coo' refers to a tab-delimited sparse triplet file (bin1, bin2, count). 'bg2' refers to a 2D bedGraph-like file (chrom1, start1, end1, chrom2, start2, end2, count).coobg2)helpr   requiredz
--metadataz+Path to JSON file containing user metadata.)r   z
--assemblyz)Name of genome assembly (e.g. hg19, mm10)z--fielda  Add supplemental value fields or override default field numbers for the specified format. Specify quantitative input fields to aggregate into value columns using the syntax ``--field <field-name>=<field-number>``. Optionally, append ``:`` followed by ``dtype=<dtype>`` to specify the data type (e.g. float). Field numbers are 1-based. Repeat the ``--field`` option for each additional field.)r   r   Zmultiplez--count-as-floatFz{Store the 'count' column as floating point values instead of as integers. Can also be specified using the `--field` option.)is_flagdefaultr   z--one-basedzWPass this flag if the bin IDs listed in a COO file are one-based instead of zero-based.z--comment-char#z1Comment character that indicates lines to ignore.)r   r   show_defaultr   z--no-symmetric-upperz-NztCreate a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values)r   r   r   z--input-copy-statusuniqueduplexa  Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric matrix, regardless of how element coordinates are ordered. Execution will be aborted if duplicates are detected. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All lower-triangle input elements will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option instead. )r   r   r   r   z--chunksizez-czSize in number of lines/records of data chunks to read and process from the input stream at a time. These chunks will be saved as temporary partial coolers and then merged.i -1)r   r   r   z
--mergebufzgTotal number of records to buffer per epoch of merging data. Defaults to the same value as `chunksize`.)r   r   z
--temp-dirzaCreate temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.)r   Z	file_okayZdir_okayr   z--max-mergez3Maximum number of chunks to merge in a single pass.   )r   r   r   r   z--no-delete-tempz,Do not delete temporary files when finished.z--storage-optionszOptions to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.z--appendz-az_Pass this flag to append the output cooler to an existing file instead of overwriting the file.c           (         s\  t t}t| \}}|du r|}| }d}|r$|
dkrd}n|
dkr$d}|durAt|}t|}W d   n1 s<w   Y  ddg}tttd}|d	krg d
}tt	t	tt	t	|d d}|
dd|
dd|
dd|
dd|
dd|
dddd t|d	||dd}n|dkrddg}t	t	|d d}dddd t|||dd}t|r|D ]c}t|dd\} }!}"}#|!du r| d v r|"dur|"|| < q| dkr|"dur|d |d |"|| < |"|| < qtjd!|d"| |vr||  | |vr||  |! | < |"dur|"|| < |"|| < qn
|d |d d|v r1|r1tj|d< tj|d< |durQt|}$|$D ]}%t|$|% trNt|$|% |$|%< q<nd}$|d#kr\tj}&n|}&tj|&d$ fd%d&|D |||	d|d'}'|d(   |d)|  |d*|  t||t||'|||||||| d|rdnd||$|rd+nd,d- dS ).a  
    Create a cooler from a pre-binned matrix.

    BINS_PATH : One of the following

        <TEXT:INTEGER> : 1. Path to a chromsizes file, 2. Bin size in bp

        <TEXT> : Path to BED file defining the genomic bin segmentation.

    PIXELS_PATH : Text file containing nonzero pixel values. May be gzipped.
    Pass '-' to use stdin.

    COOL_PATH : Output COOL file path or URI.

    **Notes**

    Two input format options (tab-delimited).
    Input pixel file may be compressed.

    COO: COO-rdinate sparse matrix format (a.k.a. ijv triple).
    3 columns: "bin1_id, bin2_id, count",

    BG2: 2D version of the bedGraph format.
    7 columns: "chrom1, start1, end1, chrom2, start2, end2, count"

    **Examples**

    cooler load -f bg2 <chrom.sizes>:<binsize> in.bg2.gz out.cool

    Nr   Zreflectr   Zdropbin1_idbin2_id)r    r!   countr   )chrom1start1end1chrom2start2end2r"   )r#   r$   r%   r&   r'   r(   r"   r#   r   r$   r   r%   r   r&      r'      r(         T)Zschemais_one_basedtril_actionsortr   )r-   r.   r/   F)Zincludes_agg>   r    r!   zA field number is required.)Z
param_hint-	c                    s   g | ]} | qS  r2   ).0nameZinput_field_numbersr2   X/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cooler/cli/load.py
<listcomp>M  s    zload.<locals>.<listcomp>)sepZusecolsnamesdtypecommentiterator	chunksizezfields: zdtypes: zsymmetric-upper: aw)columnsZdtypesmetadataassemblymergebuf	max_mergetemp_dirZdelete_tempZensure_sortedZ	triuchecksymmetric_upperh5optsmode)r
   __name__r   openjsonloadr   r   strintgetr   r   lenr   appendclickZBadParameternpZfloat64r   
isinstancelisttuplesysstdinpdZread_csvinfor   map)(r   r   r   formatrA   rB   fieldZcount_as_floatZ	one_basedZcomment_charZinput_copy_statusZno_symmetric_upperr=   rC   rD   rE   Zno_delete_tempZstorage_optionsrQ   kwargsloggerZ
chromsizesZbinsrF   r.   fZoutput_field_namesZoutput_field_dtypesZinput_field_namesZinput_field_dtypesZpipelineargr4   Zcolnumr:   _rG   keyZf_inreaderr2   r5   r6   rL      s   /








	







"





rL   )rW   rR   numpyrS   ZpandasrY   Z
simplejsonrK   creater   r   r   r   r    r	   r
   Z_utilr   r   r   commandargumentPathoptionZChoicerM   rN   rL   r2   r2   r2   r6   <module>   s    
	4