
    tft/                         d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	Z	d dl
Z
d dlZ ej                  e      Z	 ddZ	 	 	 	 ddZy)    )print_functionNc                    g }| D ]H  }	t        j                  |	      d   dk(  r|t        j                  |	d      gz  }9|t        |	d      gz  }J t        j
                  }
|}g }d}d}|r|d   j                          d}t        d|       t        t        |       D ]  \  }}|d   j                  d      s|d   j                  d	      r0 |||      \  }}}}t        |      t        |      |z  k  rEt        j                  d
|       |t        j
                  gt        |      |z  t        |      z
  z  z  }||z  dk7  r+t        j                  d|       t        j                   d       |2||k7  r-t	        j"                  |      ||   ||t        |      z    d}d}g }|}||z  }||k  r||
gt        |      z  gz  }|dz  }||k  r	 ||k7  rdj%                  |||      }t'        |      t)        j*                  ||z        }||k  r||gz  }|dz  }||k  rt        |      |k\  s	 t	        j"                  |      ||   ||t        |      z    g }|}t        d||        t	        j"                  |      |   ||t        |      z    y# t,        $ rB}t        d|t        j.                         t        dt        j.                         Y d}~ yd}~ww xY w)z9
    Convert an epilogos bedfile to multivec format.
       z.gzrtrr   Nzbase_resolution:browsertrackz-Lines contain fewer columns than expected: %szDThe start coordinate is not a multiple of the resolution in line: %sz
The expected position location does not match the observed location at entry {}:{}-{}
This is probably because the bedfile is not sorted. Please sort and try again.
            zError:filez-Probably need to set the --num-rows parameterzdumping batch:)opsplitextgzipopennpnanreadlineprint	enumeratezip
startswithlenloggerwarningerrorsysexitarrayformat
ValueErrormathceil	TypeErrorstderr)input_filenamesf_out!bedline_to_chrom_start_end_vectorbase_resolution
has_header
chunk_sizenum_rows	row_infosfilesinput_filename
FILL_VALUEbatch_lengthbatch
curr_indexbatch_start_index
prev_chrom_lineschromstartendvectordata_start_indexmessagedata_end_indexexs                             Z/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/multivec.pybedfile_to_multivecr?      s2    E) 1;;~&q)U2dii566Ed>3/00E	1 J LE JaJ	
o.c5k* X>5 8y)U1X-@-@-I$EeY$W!uc6v;Uh..NNJERrvvh#e*x"7#f+"EFFF?"a'LLV HHQK!ez&9  *!$5E
$B
 J !E

 !O3 ++zlS[011E!OJ ++	 )) uc  W%% 3#89>)fXE!OJ >) u:% HHUO e%(9CJ(F E *"E+<=qX>t HJxxPUE%L"%6U%CD  h4ECJJWs   -'J	K7KKc                 :   |}t        j                  |      rt        j                  |       t	        j
                  |d      }|j                  d       ||d   j                  d<   |j                  d       |j                  d       |}	|d   j                  t        |	             t        | \  }
}t        j                  |
d      }d| j                  v r| j                  d   }|+|d   t        |	         j                  j                  d|       |d   t        |	         j                  d       |d   t        |	         j                  d	       |d   t        |	         d   j                  d
t        |
      f|j                  |d       |d   t        |	         d   j                  dt        |
      f|d       |d   j                  d
t        |
      f|j                  |d       |d   j                  dt        |
      f|d       t        |
|      D ]   \  }}|| vr+t!        dj#                  |      t$        j&                         6|d   t        |	         d	   j                  t        |      | |   j(                  d       d}d}|d   t        |	         d	   |   }t+        t-        |t        |                  }|t        |      k  s| |   |||z    ||||z    |t+        t-        |t        | |         |z
              z  }|t        |      k  rG t/        |      }t1        j2                  t1        j4                  |||z  z        t1        j4                  d      z        }|	}t7        |      D ]  }|dz  }	|d   j                  t        |	             |+|d   t        |	         j                  j                  d|       |d   t        |	         j                  d       |d   t        |	         j                  d	       |d   t        |	         d   j                  d
t        |
      f|j                  |d       |d   t        |	         d   j                  dt        |
      f|d       t        |
|      D ]  \  }}||d   t        |         d	   vrd}|d   t        |         d	   |   }d}t+        t-        |t        |                  }t9        |j(                        }t1        j2                  |d   dz        |d<   t;        |      }|d   t        |	         d	   j                  ||d       |t        |      k  s|d   t        |         d	   |   |||z    }t        |      dz  dk7  r t        j<                  ||d   gf      }|dz  } ||      }	 ||d   t        |	         d	   |   t+        |dz        t+        |dz  |dz  z          |t+        t-        |t        |      |z
              z  }|t        |      k  r |	} |S )a  
    Create a multires file containing the array data
    aggregated at multiple resolutions.

    Parameters
    ----------
    array_data: {'chrom_key': np.array, }
        The array data to aggregate organized by chromosome
    chromsizes: [('chrom_key', size),...]
    agg: lambda
        The function that will aggregate the data. Should
        take an array as input and create another array of
        roughly half the length
    starting_resolution: int (default 1)
        The starting resolution of the input data
    tile_size: int
        The tile size that we want higlass to use. This should
        depend on the size of the data but when in doubt, just use
        256.
    winfoz	tile-sizeresolutionschromsS)dtyper+   valuesnamer   )shaperF   datacompressionlength)rI   rJ   rK   zMissing chrom {} in input filer
   )rK   g     j@r      r   )r   existsosremoveh5pyFilecreate_groupattrsstrr   r   r   createcreate_datasetr   rF   r   r   r   r#   rI   intminsumr    r!   lograngelisttupleconcatenate)
array_data
chromsizesaggstarting_resolution	tile_sizeoutput_filer+   filenamefcurr_resolutionrD   lengthschrom_arrayr6   rL   standard_chunk_sizer7   
chrom_datar)   total_lengthmax_zoomprev_resolutioni	new_shapeold_datanew_datas                             r>   create_multivec_multiresru      s   : H 
yy
		( 			(C A NN6#,AfIOOK NN=!NN8 *O
 m!!#o"67:&OFG((6-K j&&&$$[1	 	-_-.44;;KSmS)*77AmS)*77AmS)*84CC6{n D  mS)*84CCVW& D  hK6{n   hKVW&  
 VW- Sv
"299%@szzR 	
-_-.x8GGJ
5)//V 	H 	
 "}%c/&:;HEeL
0#j/BC
 c*o%5?5F
*6Juuz12 S0#j6G2H52PQRRE c*o%!S4 w<Lyy-@!@ABTXXa[PH &O8_ P* *A-	-%%c/&:;  mS1288??YW	-_-.;;HE	-_-.;;HE	-_-.x8GGv;.## 	H 	
 	
-_-.x8GGS[Nf 	H 	
 !1 7	PME6Am,S-AB8LL
 E=)#o*>?I%PJ"%S!4c*oFGJZ--.I99Yq\A%56IaLi(ImS128<KKyf L  #j/)]+C,@A(KEREJ. x=1$)  "~~x(2,.HIH!OJ x=  - _!56x@G	NSZ!^)C%D S!4c*o6MNOOC #j/)-7	Pr *aP*b H    )N)r   i   z/tmp/my_file.multiresN)
__future__r   r   loggingr    rP   os.pathpathr   r   rR   numpyr   	getLogger__name__r   r?   ru    rv   r>   <module>r      sS    %    	  
  			8	$ @WN 'Orv   