o
    Nrft/                     @   s|   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	Z	d dl
Z
d dlZeeZ	d
ddZ				ddd	ZdS )    )print_functionNc                 C   s  g }| D ]}	t |	d dkr|t|	dg7 }q|t|	dg7 }qtj}
|}g }d}d}|r5|d   d}td| tt	| D ]\}}|d 
dsT|d 
d	rUqB|||\}}}}t|t|| k r~td
| |tjgt|| t|  7 }|| dkrtd| td |dur||krt||| ||t| < d}d}g }|}|| }||k r||
gt| g7 }|d7 }||k s	 ||krd|||}t|t|| }||k r||g7 }|d7 }||k st||kr7zt||| ||t| < W n$ ty, } ztd|tjd tdtjd W Y d}~ dS d}~ww g }|}td|| qBt||| ||t| < dS )z9
    Convert an epilogos bedfile to multivec format.
       z.gzrtrr   Nzbase_resolution:Zbrowsertrackz-Lines contain fewer columns than expected: %szDThe start coordinate is not a multiple of the resolution in line: %sz
The expected position location does not match the observed location at entry {}:{}-{}
This is probably because the bedfile is not sorted. Please sort and try again.
            zError:filez-Probably need to set the --num-rows parameterzdumping batch:)opsplitextgzipopennpnanreadlineprint	enumeratezip
startswithlenloggerwarningerrorsysexitarrayformat
ValueErrormathceil	TypeErrorstderr)Zinput_filenamesZf_outZ!bedline_to_chrom_start_end_vectorZbase_resolution
has_header
chunk_sizeZnum_rows	row_infosfilesZinput_filenameZ
FILL_VALUEZbatch_lengthbatchZ
curr_indexZbatch_start_indexZ
prev_chrom_lineschromstartendZvectorZdata_start_indexmessageZdata_end_indexex r-   Y/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/multivec.pybedfile_to_multivec   s   
 

"r/   r      /tmp/my_file.multiresc              	   C   s  |}t |rt| t|d}|d ||d jd< |d |d |}	|d t|	 t	| \}
}t
j|
dd}d| jv rJ| jd }|d	ur[|d t|	 jd| |d t|	 d |d t|	 d
 |d t|	 d jdt|
f|j|dd |d t|	 d jdt|
f|dd |d jdt|
f|j|dd |d jdt|
f|dd t	|
|D ]n\}}|| vrtd|tjd q|d t|	 d
 jt|| | jdd d}d}|d t|	 d
 | }tt|t|}|t|k r,| | |||  |||| < |tt|t| | | 7 }|t|k sqt|}tt|||  td }|	}t|D ] }|d }	|d t|	 |d	urj|d t|	 jd| |d t|	 d |d t|	 d
 |d t|	 d jdt|
f|j|dd |d t|	 d jdt|
f|dd t	|
|D ]\}}||d t| d
 vrqd}|d t| d
 | }d}tt|t|}t|j}t|d d |d< t|}|d t|	 d
 j||dd |t|k rd|d t| d
 | |||  }t|d dkr/t
||d gf}|d7 }||}	 ||d t|	 d
 | t|d t|d |d  < |tt|t|| 7 }|t|k sq|	}qH|S )a  
    Create a multires file containing the array data
    aggregated at multiple resolutions.

    Parameters
    ----------
    array_data: {'chrom_key': np.array, }
        The array data to aggregate organized by chromosome
    chromsizes: [('chrom_key', size),...]
    agg: lambda
        The function that will aggregate the data. Should
        take an array as input and create another array of
        roughly half the length
    starting_resolution: int (default 1)
        The starting resolution of the input data
    tile_size: int
        The tile size that we want higlass to use. This should
        depend on the size of the data but when in doubt, just use
        256.
    winfoz	tile-sizeresolutionschromsS)dtyper#   Nvaluesnamer   )shaper7   datacompressionlength)r:   r;   r<   zMissing chrom {} in input filer   )r<   g     j@r      r   )r	   existsosremoveh5pyFileZcreate_groupattrsstrr   r   r   createZcreate_datasetr   r7   r   r   r   r    r:   intminsumr   r   lograngelisttupleZconcatenate)Z
array_dataZ
chromsizesZaggZstarting_resolutionZ	tile_sizeZoutput_filer#   filenamefZcurr_resolutionr5   lengthsZchrom_arrayr(   r=   Zstandard_chunk_sizer)   Z
chrom_datar"   total_lengthZmax_zoomZprev_resolutioniZ	new_shapeZold_dataZnew_datar-   r-   r.   create_multivec_multires   s   











#rT   )N)r   r0   r1   N)
__future__r   r   loggingr   rA   Zos.pathpathr	   r   rC   numpyr   	getLogger__name__r   r/   rT   r-   r-   r-   r.   <module>   s$    

 