
    tf                        d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dl	m
Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z" d dl#Z#ddl$m%Z% ddl&m'Z'm(Z(  e%jR                         d        Z*g fdZ+	 dd
Z,	 ddZ-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ.d Z/d Z0d Z1e*je                          ejf                  dd       ejh                  dddd       ejh                  d d!d" ejj                   ejl                               d#$       ejh                  d%dd&       ejh                  d'd(dd)       ejh                  d*d+d,d-.       ejh                  d/d0d.       ejh                  d1d2d.       ejh                  d3d4d.       ejh                  d5d6d.       ejh                  d7d8d	.       ejh                  d9d: ejj                  d;d<g      d;$       ejh                  d=d>e7d$       ejh                  d?d: ejj                  d@dAg      d@$       ejh                  dBdCdDE       ejh                  dFdGdDE       ejh                  dHdId.       ejh                  dJdKdLdM.      dN                                                                                                                                      Z8e*je                          ejf                  dd       ejh                  ddddO       ejh                  d d!d"P       ejh                  dQdRP       ejh                  d7d8d	.       ejh                  d%dd&       ejh                  dSde9dTU       ejh                  d'ddV       ejh                  dWde7X       ejh                  dHdId.       ejh                  dYdZe9d $      d[                                                                                     Z:e*je                          ejf                  dd       ejh                  ddd\e7d$       ejh                  d d!d"e7dD]       ejh                  dQd^dRe7ddD_       ejh                  d7d8d	dD`       ejh                  dSdadbe9ddD_       ejh                  d'dcdVe9ddD_       ejh                  d%d+d&e7ddD_       ejh                  dHdIe7ddD_       ejh                  dddee9ddD_       ejh                  dfdge9ddD_       ejh                  dhdie9ddD_       ejh                  djdke9ddD_       ejh                  dldme9ddD_       ejh                  dndoe9ddD_       ejh                  dpdqe9ddD_       ejh                  drdse9ddD_       ejh                  dtdudDdvw      dx                                                                                                                                      Z;e*je                          ejf                  dd       ejh                  ddddy       ejh                  d!d ddz       ejh                  dcd'd{dV       ejh                  d+dHdd|       ejh                  d}d{d~       ejh                  d9d: ejj                  d;dg      d;$       ejh                  ddd.      d                                                                Ze*je                          ejf                  dd       ejh                  ddddy       ejh                  dadSde9X       ejh                  dcd'd{dV       ejh                  dKdddV      d                                           Z<y)    )divisionprint_functionN   )cli)get_tile_pos_from_lng_lattransactionc                       y)zW
    Aggregate a data file so that it stores the data at multiple
    resolutions.
    N r
       _/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/cli/aggregate.py	aggregater      s     	r   c                     | j                  d       | j                  d|||dj                  |      dj                  t        t        |            |||dj                  |
      |	f
       | j	                          y )NaQ  
        CREATE TABLE tileset_info
        (
            zoom_step INT,
            max_length INT,
            assembly text,
            chrom_names text,
            chrom_sizes text,
            tile_size REAL,
            max_zoom INT,
            max_width REAL,
            header text,
            version text
        )
        z5INSERT INTO tileset_info VALUES (?,?,?,?,?,?,?,?,?,?)	)executejoinmapstrcommit)cursor	zoom_step
max_lengthassemblychrom_nameschrom_sizes	tile_sizemax_zoom	max_widthversionheaders              r   store_meta_datar    '   sy     NN	$ NN?IIk"IIc#{+,IIf	
  MMOr   d   Fc                     d }t         ||        ||      z         }|j                         }|rt        |d       }nt        |d       }|d | S )Nc                 6    | D cg c]	  }|d   |f c}S c c}w )Nr
   )entrieses     r   extract_keyz0reduce_values_by_importance.<locals>.extract_keyc   s    $+,q2
,,,s   c                     t        | d         S Nfloatxs    r   <lambda>z-reduce_values_by_importance.<locals>.<lambda>j   s    qu r   keyc                      t        | d          S r)   r+   r-   s    r   r/   z-reduce_values_by_importance.<locals>.<lambda>l   s    %", r   )dictvaluessorted)entry1entry2max_entries_per_tilereverse_importancer'   by_uidcombined_by_uidcombined_entriess           r   reduce_values_by_importancer=   `   s[    - +f%F(;;<FmmoO!/7MN!/7NO1122r   c           	         t        j                  | d      }|t        j                  |       d   dz   }t	        j
                  ||      \  }	}
}|dk(  r	 |dk(  rd }nd }t        d	|       |Bt        |d      5 }|D cg c]!  }|j                         j                  d
      # }}ddd       nd}t        d       t        j                  |t        |
|      d ||||       yc c}w # 1 sw Y   BxY w)aE  
    Aggregate a multivec file.

    This is a file containing nxn data that is aggregated along only one axis.
    This data should be in an HDF5 file where each dataset is named for a
    chromosome and contains a 'resolutions' group containing values for the
    base level resolution.

    Example: f['chr1']['reslutions']['1000'] = [[1,2,3],[4,5,6]]

    The resulting data will be organized by resolution and chromosome.

    Example: f_out['chr1']['resolutions']['5000']=[[1000,2000,3000],[4000,5000,6000]]

    Aggregation is currently done by summing adjacent values.
    rNr   z.multires.mv5maxtotal	logsumexpc                     | j                   j                  | j                  d   ddf      }t        j                  |d      j                   S Nr   r*      )axis)TreshapeshapesmrA   )r.   as     r   aggz_multivec.<locals>.agg   s:    QWWQZQ/0A<<*,,,r   c                     | j                   j                  | j                  d   ddf      j                  d      j                   S rC   )rF   rG   rH   sumr-   s    r   rK   z_multivec.<locals>.agg   s8    33;;
B2377Q7?AAAr   zagg:utf8z
row_infos:c                     t        j                  | j                  j                  | j                  d   ddf      d      j                  S rC   )npnansumrF   rG   rH   r-   s    r   r/   z_multivec.<locals>.<lambda>   s3    biiQWWQZQ,? @qIKK r   )
chromsizesrK   starting_resolutionr   output_file	row_infos)h5pyFileopsplitextcchload_chromsizesprintopenstripencodecmvcreate_multivec_multireszip)filepathrT   r   r   chromsizes_filenamerS   methodrow_infos_filenamef_in
chrom_infor   r   rK   flinerU   s                   r   	_multivecrk   q   s   4 99Xs#Dkk(+A.@-0-@-@X.*Zk
 	-	B 
&#%$c* 	DaABC,,V4CIC	D 	D 		,	"  {K0K/ D	D 	Ds   :C+?&C&%C+&C++C4random   rD                 順 c                 
  	
,-. d}dkD  rt        d|        | dk(  rt        j                  }n4| j                  d      rt	        j
                  | d      }nt        | d      }|Z| }| j                  d      r"t        j                  j                  |      d   }t        j                  j                  |      d   dz   }t        j                  |      rt        j                  |       t        j                  |      \  ,}}	,
f	d	}g }|r|j                          n||j                         j                         }	 |j!                         }t#        |
dz
            t#        |dz
            t#        |dz
            t#        |dz
             ||      g}||D cg c]  }|j                          c}D cg c]  }|s ||       c}z  }| |D cg c]  }|d   |k(  s	|d   |k(  s| }}dkD  rt        dt)        |       d       ,j*                  dz   }t#        t-        j.                  t-        j0                  ||z        t-        j0                  d      z              }t3        j4                  t6        j8                  d        t3        j:                  |d       }t=        |d||||||d|z  z  |
       |j?                         } | jA                  d       | jA                  d       | jA                  dt#        |dz         d       | jA                  d       | jA                  d       d}!d-tC        jD                  d       }"tG        |d       }g }#g }$-fd}%tI        |      D ]  \  }&}d}!|!|k  rk|d||!z
  z  z  .tK        tM        .fd|d   d   |d    d   g            }'tK        tM        .fd!|d   d   |d    d   g            }(d"})tO        |'d   |(d   dz         D ]2  }*|)s n.tO        |'d   |(d   dz         D ]  }+|"|!   |*   |+   |kD  sd#}) 2 4 |)rtO        |'d   |(d   dz         D ]/  }*tO        |'d   |(d   dz         D ]  }+|"|!   |*   |+xx   dz  cc<    1 |#jQ                  -|!|d$   |d   d   |d   d   |d    d   |d    d   |d%   |d&   |d'   f
       |$jQ                  -|d   d   |d   d   |d    d   |d    d   f       -dz  -n|!dz  }!|!|k  rkt)        |#      |k\  s |%|| |#|$         |%|| |#|$       | jS                          y # t$        $ r d
j'                  |      }t%        |      w xY wc c}w c c}w c c}w )(Nr   r   zBEDPEDB Version -.gzrtr?   z.bedpedbc                 "  	 | j                         }i }	 |	dz
     }|
dz
     }j                  |   }j                  |   }|t        |dz
           z   |t        |dz
           z   g|d<   |t        |dz
           z   |t        |dz
           z   g|d<   t        j                         |d<   |d   d   t        |dz
           z
  |d<   t        |      |d<   t        |      |d	<   .t        |d   d   |d   d   z
  |d   d   |d   d   z
        |d
<   n:dk(  rt        j                         |d
<   nt        |t              dz
           |d
<   | |d<   |S # t        $ r+ dj	                  |	dz
     |
dz
           }t        |      w xY w)Nr   xsyszERROR converting chromosome position to genome position. Please make sure you've specified the correct assembly using the --assembly option or a chromsizes file using the . --chromsizes-filename option.Current assembly: {}, chromosomes: {},{}uidr   	chrOffsetchrom1chrom2
importancerl   fields)splitcum_chrom_lengthsintKeyErrorformatslugidnicer   maxrl   r,   )rj   partsdr}   r~   chrom1_offsetchrom2_offset	error_strr   chr1_colchr2_colrh   	from1_col	from2_colimportance_columnto1_colto2_cols           r   line_to_dictz_bedpe.<locals>.line_to_dict   s   

	(8a<(F8a<(F&88@M&88@M E)a-$8 99E'A+$6 77AdG
 E)a-$8 99E'A+$6 77AdG  ;;=%4c%	A*>&??+&k(&k($!!D'!*qwqz"91T71:$PQ
;RSAlO(*$mmoAlO $E#.?*@1*D$EFAlO(9  
	(; <B6eHqL15A3F<  I&'
	(s   BE 4FzCouldn't convert one of the bedpe coordinates to an integer. If the input file contains a header, make sure to indicate that with the --has-header option. Line: {}r}   r~   zFound z entriesrD   c                     t        |       S Nr   vals    r   r/   z_bedpe.<locals>.<lambda>:  
    3s8 r   )isolation_level)r   r   r   r   r   r   r   r   zPRAGMA synchronous = OFF;zPRAGMA journal_mode = OFF;zPRAGMA cache_size =   ;a5  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            fromX int,
            toX int,
            fromY int,
            toY int,
            chrOffset int,
            uid text,
            fields text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rFromX, rToX,
            rFromY, rToY
        )
        c                  .    t        j                  d       S )Nc                  4    t        j                  t              S r   coldefaultdictr   r
   r   r   r/   z*_bedpe.<locals>.<lambda>.<locals>.<lambda>r      #//RUBV r   r   r   r
   r   r   r/   z_bedpe.<locals>.<lambda>r      #//:V*W r   c                     | d    S Nr   r
   r-   s    r   r/   z_bedpe.<locals>.<lambda>t      Q|_,< r   r0   c                     dkD  rt        d d       t        |       5  |j                  d|       |j                  d|       d d d        |j                          |j                          y # 1 sw Y   *xY w)Nr   zInsert batch ()2INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?,?)-INSERT INTO position_index VALUES (?,?,?,?,?))r\   r   executemanyclear)conncinterval_insertsposition_index_insertscounterverboses       r   batch_insertz_bedpe.<locals>.batch_inserty  sx    Q;N7)1-. 	MMDFV MM?AW		 	 $$&	 	s   %A//A8c                      t        | z        S r   r   r.   
tile_widths    r   r/   z_bedpe.<locals>.<lambda>  s    c!j.1 r   ry   rz   c                      t        | z        S r   r   r   s    r   r/   z_bedpe.<locals>.<lambda>  s    Q^)< r   TFr   r|   r{   r   )*r\   sysstdinendswithgzipr]   ospathrY   rX   existsremoverZ   r[   readliner^   r   r   
ValueErrorr   lentotal_lengthmathceillogsqlite3register_adapterrP   int64connectr    r   r   r   r   r5   	enumeratelistr   rangeappendclose)/rc   rT   r   r   
has_headermax_per_tiler   
chromosomerd   r   r   r   r   r   r   r   sqlite_cache_sizesqlite_batch_sizer   BED2DDB_VERSIONri   r   r   r   r%   
first_liner   r   rj   r   assembly_sizer   r   	curr_zoomtile_countsr   r   r   	entry_num	tile_fromtile_toempty_tilesijrh   r   r   s/     ``     ``````   `                         @@@r   _bedper      s   * O{  1233II			5	!IIh%3U#''**;7:Kgg&&{3A6C	yy
		++.+>+>X,(J[- -^ G	

ZZ\'')
	($$&Ei!m$%gk"#i!m$%gk"#  
+,/K

/KTttT"TTG
!H+";q{j?XA
 
 {s7|nH-. ++a/M499TXXmi&?@488A;NOPH RXX';<??;=D 	 a8m+  	AII)*II*+II$S):T)A%B$C1EFII	$ II	 IG//"WXKW"<=G' "'* 8L	1	8#"Q8i+?%@@J1AdGAJ$
3KLI 3<qwqz1T7ST:>VWXGK
 9Q<a8 "y|WQZ!^< A"9-a03lB&+		 y|WQZ!^< :A"9Q<a@ :#I.q1!494:: !''!,$
$
$
$
+%( '--agaj!D'!*agaj!D'!*M 1NIe 8#h  $55q"24JKq8Lt q*,BCGGI
O  	(GGMvH  Y''	( 0LT
s+   AT7 /U U%
U%(U*>U*7&Uc                 v
  
*+, d}|| dz   }n|}t        j                  |      rt        j                  |       | j	                  d      rdd l} |j                  | d      }nt        | d      }	 t        j                  |	|      \  *}}t        j                  d      +*
+fd
}g }t        d|       |r0|j                         }|j!                         j#                  |      }n|j                         j!                         }|j!                         j#                  |      }	 | ||      gz  }t)        t*        t-        t/        dt1        |j!                         j#                  |            dz                     }|D ].  }|j!                         j#                  |      }	 | ||      gz  }0 ||D cg c]  }|d   |k(  s| }}|}*j2                  dz   }	 t5        t7        j8                  t7        j:                  ||z        t7        j:                  d      z              }	 dd l} |j>                  t@        jB                  d        t        d|d|        |jD                  |      }tG        |d|||||||d|z  z  ||       i ,g }|D ]  }|d   }|,|<   ||d   |d   |fgz  } |}|jI                         }|jK                  d       |jK                  d       d}d}|} |||k  r|} tM        |,fd      }!t        d|       tO        jP                  t4              }"|!D ]I  }#|| k  r>|d||z
  z  z  }|#d   }$d}%|$|#d   k  rDt7        jR                  |$|z        }&djU                  ||&      }'	 |"|'   |k\  rd }%n|$|z  }$|$|#d   k  rD|%rS|#d   }$|$|#d   k  rFt7        jR                  |$|z        }&djU                  ||&      }'|"|'xx   dz  cc<   	 |$|z  }$|$|#d   k  rF|%r},|#d!      }(d"})|jK                  |)|||(d#   |(d   |(d   |(d$   |(d   |(d%   |(d&   f	       |d'z  dk(  rt        d(||(d   |(d   z
         d)})|jK                  |)||||(d   |(d   f       |dz  }n|dz  }|| k  r>d}L |jW                          y# t        $ r? |	t        d|t        j                         Y y t        d	|	t        j                         Y y w xY w# t$        $ r% t        d|d    dt        j                         Y y t&        $ r t        d|       Y w xY w# t&        $ r t        d|       Y w xY wc c}w )*Nrn   z.beddbrv   r   rw   r?   zAssembly not found:filezChromsizes filename not found:c           
      t   	 t        | d         }t        | d         }| d   }j                         }n;dk(  r||z
  }n0dk(  rj                         }nt	        | t              dz
           }||k  r t        d| t        j                         ||}}t        |       d	kD  r| d	   }nd
}
j                  |   |z   z   }
j                  |   |z   z   }||z
  }||t        j                         ||dj                  |       |t        |      d}	|	S # t        $ r t        dj                  |             w xY w)zw
        Convert a bed file line to a numpy array which can later
        be used as an entry in an h5py file.
        r   rD   z$Error parsing the position, line: {}r   sizerl   zWARNING: stop < start:r   rn    r   )startPosendPosr{   namer|   r   r   r   )r   r   r   rl   r,   r\   r   stderrr   r   r   r   r   r   )rj   startstopchromr   bedline_namegenome_start
genome_end
pos_offsetr   rh   r   offsetrands             r   line_to_np_arrayz"_bedfile.<locals>.line_to_np_array  sX   
	RQLEtAw<D Q$J&(J(*JtC(9$:Q$>?@J%<*DszzB4Et9q=7LL!33E:UBVK11%84?&H
!E)
$ ;;= #iio$e*	
 Q  	RCJJ4PQQ	Rs   D $D7z
delimiter:zUnable to find zz in the list of chromosome sizes. Please make sure the correct assembly or chromsizes filename is passed in as a parameterzInvalid line:r   r   rD   c                     t        |       S r   r   r   s    r   r/   z_bedfile.<locals>.<lambda>e  r   r   zoutput_file:zheader:)	r   r   r   r   r   r   r   r   r   r{   r   r   a&  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            startPos int,
            endPos int,
            chrOffset int,
            uid text,
            name text,
            fields text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rStartZoomLevel, rEndZoomLevel, rStartPos, rEndPos
        )
        c                     | d      d    S )Nr*   r   r
   )r.   uid_to_entrys    r   r/   z_bedfile.<locals>.<lambda>  s    ,qu"5l"C!C r   r0   zmax_per_tile:Tz{}.{}Fr*   z0INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)r   r|   r   r   r   zcounter:r   ),rX   r   r   r   r   r   r]   rZ   r[   FileNotFoundErrorr\   r   r   rl   Randomr   r^   r   r   
IndexErrorr   r   r   r   r   r   r   r   r   r   r   r   rP   r   r   r    r   r   r5   r   r   floorr   r   )-rc   rT   r   r   r   r   r   r   	delimiterrd   r   BEDDB_VERSIONr   bed_filer   r   r   dsetrj   r   
line_partsr   r   r   r   r   	intervalsr{   r   r   r   r   max_viewable_zoomsorted_intervalsr   intervalcurr_posspace_available	curr_tiletile_idvalueexec_statementrh   r   r   s-      `      `                               @@@r   _bedfiler    s    M)!	yy
		+499Xt,#&141D1D2
.[+ ==D0d D	,	"  "##I.  "((*ZZ\''	2
	)%j122D S$uQDJJL,>,>y,I(JQ(NOPQ )ZZ\''	2
	)%j122D) Aa1\?j#@AA I++a/M 499TXXmi&?@488A;NOPH GRXX';<	.+y&97??;'D 	 a8m+ LI  9hSq}ak3788	9
 JAII	" II	 IG 8 3$C 
/<(//#&K$ W,,"Q8i+?%@@J{H"O Xa[( JJx*'<=	!..I> w'<7&+OJ& Xa[(" #A;!, $

8j+@ AI%nnY	BG(A-(
 
*H% !,( $Xb\2 "T		"  !l+j)hk*efh
  T>Q&*guXzAR/RS!P		"iE*4EuXW 1NIg ,,j 	oWp 	KKM}  &'

C
  02ECJJ P  	!*Q- 1. . ZZ	  	)/4(	)  	)/4(	) BsN   2R 'S $T8T6T6'S0SS+T?TTT32T3c           
        
*+,-./01 |t        j                  |       d   dz   }t        d|       t        j                  |      rt	        j
                  |       t        j                  |d      }|\t        j                  |      }t        j                  |      D cg c]  }|j                  d       }}t        j                  |      }n[t        j                  |      }t        j                  |      D cg c]  }|j                  d       }}t        j                  |      }|j                   *t        d*       |	}	|	d
z  z  
g ,g /d}g 0g g+g g.*d|z  z  |	kD  rt#        j$                  *d|z  z        },|j'                  dt)        |      z   |fd	d
      gz  ,/|j'                  dt)        |      z   |fd	d
      gz  /+g gz  +.g gz  .0dgz  0|z  }*d|z  z  |	kD  r|j'                  ddd	      }t        d|       |j*                  d<   *|j*                  d<   ||j*                  d<   ||j*                  d<   ||j*                  d<   ||j*                  d<   |	|j*                  d<   t#        j$                  t#        j,                  |j*                  d   |	z        t#        j,                  d      z        x|j*                  d<   -|	d-z  z  |j*                  d<   d|j*                  d<   t        d|j*                  d          t        d|j*                  d          t        d|j*                  d          t        d
       t        d|j*                  d          t/        j.                         1| dk(  rt0        j2                  }n5| j5                  d      rdd l} |j8                  | d       }nt9        | d!      }d}*
+,-./01f
d"}g }g }|r|j;                          d}|D ]  } | j=                         j?                         }!|j@                  |!|d#z
        tC        |!|d#z
           z   }"|"|z
  d#kD  r7|tD        jF                  g|"|z
  d#z
  z  z  }|d#g|"|z
  d#z
  z  z  }||"|z
  d#z
  z  }|!|d#z
     |k(  rd#nd}#|d$k(  r0|!|d#z
     |k(  sdtI        |!|d#z
           z  ntD        jF                  }$n,|!|d#z
     |k(  stI        |!|d#z
           ntD        jF                  }$|$gtC        |!|d#z
           tC        |!|d#z
           z
  z  }%|#gtC        |!|d#z
           tC        |!|d#z
           z
  z  }&|r|%|$gz  }%|&|#gz  }&||%z  }||&z  }|"tK        |%      z   |j*                  d<   |tK        |%      z  }tK        |      
kD  st        d%tK        |      
       t        d&|         ||d 
 |d 
        |
d  }|
d  }tK        |      
kD  rJ  |||       	 tK        +|         
tE        jL                  +|   d 
       }'tE        jL                  .|   d 
       }(	 0|   })|',|   |)|)
z    |(/|   |)|)
z    +|d#z   xx   tO        tQ        jR                  |'dz              z  cc<   .|d#z   xx   tO        tQ        jR                  |(dz              z  cc<   +|   
d  +|<   .|   
d  .|<   0|xx   
z  cc<   |d#z  }|z  -k\  ry c c}w c c}w )'Nr   z.hitilezoutput file:wzutf-8zassembly_size:rD   values_ri   r   )dtypecompressionnan_values_meta)r   )r  z	assembly:z	zoom-stepz
max-lengthr   zchrom-nameszchrom-sizeszchrom-orderz	tile-sizezmax-zoomz	max-widthzmax-positionzassembly size (max-length)z	max_zoom:zchunk-size:ru   rv   rw   r?   c           
      F  
 d}
dxx   | z  cc<   dxx   |z  cc<   t        j                          z
  }|   dz   t              z  }t        dj                  |   dz   ||||z  |z
               t	        
|         	k\  rt        dt	        
|                t        j                  
|   d 	       }t        j                  |   d 	       }	 t        d|          |   }||   ||	z    ||   ||	z    
|dz   xx   t        t        j                  |dz              z  cc<   |dz   xx   t        t        j                  |dz              z  cc<   
|   	d  
|<   |   	d  |<   |xx   	z  cc<   |dz  }|z  k\  ry t	        
|         	k\  ry y )Nr   r   z?position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}zlen(data_buffers[curr_zoom])zpositions[curr_zoom]:rD   )
timer,   r\   r   r   rP   arrayr   ctr   )buffers_to_addnan_buffers_to_addr   	curr_timepercent_progress
curr_chunknan_curr_chunkr  r   
chunk_sizedata_buffersdsetsr   nan_data_buffers	nan_dsets	positionst1r   s           r   add_values_to_data_buffersz-_bedgraph.<locals>.add_values_to_data_buffers  s   	Q>)11IIK"$	%i014m8LL  &)$q( -.:	!	
 ,y)*j80#l96M2NO,y"9+:"FGJXX&6y&A+:&NON
 )9Y+?@ +HAKE)X:(=>ESIi Hz,AB Q'4Zi8, ' Y]+t^Q)^<0 + '39&=jk&JL#*:9*Ejk*RY' i J. NI9$0M ,y)*j8r   r   exp2zlen(values):zline:)*rX   rY   r\   r   r   r   rV   rW   ncget_chrominfo_from_fileget_chromorder_from_filer_   get_chromsizes_from_fileget_chrominfoget_chromorderget_chromsizesr   r   r   create_datasetr   attrsr   r  r   r   r   r   r]   r   r^   r   r   r   rP   nanr,   r   r  r   r  r   )2rc   rT   r   	chrom_colfrom_pos_col
to_pos_col	value_colr   r   r   r   re   	nan_value	transform	count_nanclosed_intervalrd   r   ri   rh   rJ   chrom_orderr   zdset_lengthr   r   r   r'  r4   
nan_valuescurr_genome_posrj   r   start_genome_pos	nan_countr  values_to_addnan_counts_to_addr  r  r  r   r!  r"  r   r#  r$  r%  r&  s2             `      `                        @@@@@@@@r   	_bedgraphrD    s   ( kk(+A.:	.+& 
yy
		+		+s#A &//0CD
')'B'BCV'W
"#AHHW
 
 112EF%%h/
242C2CH2MNQqxx(NN''1++M	
M*IQ*_,JEI 	
AI4Lt
!q&
 9
,iiQ 67CF"[N#6  
 	

 	A&cv  
 	
	 	RD aS		Y# !q&
 9
,( 	
S1A	+x $AGGK)AGGL"AGGJ(AGGM(AGGM(AGGM$AGGK%)YY&23dhhqkA& AGGJ( %qH}4AGGKAGGN	
&(=>	+qww{+,	+qwwz*+	-$	-/0	B 3IIU#		(D)AXs#AI8 8t FJ	

 O  ;1 

""$%77i!m8LMPS,"#Q
 
 o-1rvvh"2_"Dq"HIIF1#!1O!Ca!GHHJ//AAEEO y1}-:A	  Y]+y8 U5Q/00VV  Y]+y8 eIM*+VV  j1n%&U<!3C-D)EE
 'Kj1n%&U<!3C-D)EE
 eW$M), 	-''
"2S5G"G3}--&kJ&.#f+z:'4 &vkz':J{
<STJK(F#JK0J &kJ&m;1z vz2 i01
XXl95kzBC
"29"=kz"JK	 Y'=GiHz$9:AO	)X:(=> 	Y]#tBLLQ)^,T'UU#Q'4LLi8,
 	
' #/y"9*+"FY&6y&A*+&N#
 	)
*Q	 y H,C y
 Os   "[>[c                    | dk(  rt         j                  }n4| j                  d      rt        j                  | d      }nt	        | d      }|| dz   }n|}t        j                  |      rt        j                  |       t        j                  |      }g }d }|d   D ]  }		 |	d   d	   }
	 |	d   d
   }	  ||	d   d   |
      \  }}}}}|j                  |||||
xs ||xs t        j                         t        j                  |	d         t        j                  |	d         d        t!        j"                  t$        j&                  d        t!        j(                  |      }|j+                  d       |j+                  dd|dddddf       |j-                          |j/                         }|j+                  d       |j+                  d       d}d}t1        j2                  d       }t5        |d       }d}|D ]  }d}||k  st7        |d   |d   |      }t7        |d   |d    |      }d!}t9        t;        |d         t;        |d         dz         D ]D  }|s n@t9        t;        |d         t;        |d         dz         D ]  }||   |   |   |kD  sd"} D F |rt9        t;        |d         t;        |d         dz         D ]A  }t9        t;        |d         t;        |d         dz         D ]  }||   |   |xx   dz  cc<    C |j+                  d#|||d$   |d   |d   |d    |d   |d
   |d   |d   f
       |j-                          |j+                  d%||d   |d   |d    |d   f       |j-                          |dz  }|dz  }||k  r |j=                          y #  d }
Y xY w#  d }Y xY w# t        $ r  w xY w)&Nru   rv   rw   r?   z.gjdbc                 f   t         j                  }t         j                   }t         j                  }t         j                   }d}t        |       }	 | D ]  }t        |      D ]t  \  }	}
t	        ||
d         }t        ||
d         }t	        ||
d         }t        ||
d         }|rE|	dz   |z  }|||	   d   ||   d   z  z  }|||   d   ||	   d   z  z  }v t        |      dz  } 	 ||||t        |      dz  fS # t        $ r+ | d   }| d   }| d   }| d   }t        j                         }Y Fw xY w)Ng        r   r   g       @)	r   infr   r   minr   abs	TypeErrorrl   )coordsno_area_compminXmaxXminYmaxYareancoord_groupr   coordr   s               r   getRectz_geojson.<locals>.getRectF  sd   xxyxxyK	#% ' )+ 6 FHAutU1X.DtU1X.DtU1X.DtU1X.D'UaKAq 1KN14E EEAq 1KN14E EEF 4y3', T4s4y366  	#!9D!9D!9D!9D ==?D	#s   AC< &AC< <1D0/D0features
propertiesrQ  r{   geometrycoordinates)minLngmaxLngminLatmaxLatr   r{   rX  rW  c                     t        |       S r   r   r   s    r   r/   z_geojson.<locals>.<lambda>  r   r   z
        CREATE TABLE tileset_info
        (
            zoom_step INT,
            tile_size INT,
            max_zoom INT,
            min_x INT,
            max_x INT,
            min_y INT,
            max_y INT
        )
        z/INSERT INTO tileset_info VALUES (?,?,?,?,?,?,?)r      iL   iZ   aA  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            minLng int,
            maxLng int,
            minLat int,
            maxLat int,
            uid text,
            geometry text,
            properties text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rMinLng, rMaxLng,
            rMinLat, rMaxLat
        )
        r   c                  .    t        j                  d       S )Nc                  4    t        j                  t              S r   r   r
   r   r   r/   z,_geojson.<locals>.<lambda>.<locals>.<lambda>  r   r   r   r
   r   r   r/   z_geojson.<locals>.<lambda>  r   r   c                     | d    S r   r
   r-   s    r   r/   z_geojson.<locals>.<lambda>  r   r   r0   rZ  r]  r[  r\  TFr   r   r   )r   r   r   r   r]   rX   r   r   r   jsonloadr   r   r   dumps	Exceptionr   r   rP   r   r   r   r   r   r   r   r5   r   r   r   r   )rc   rT   r   r   r   ri   geojsonr%   rU  featurerQ  r{   rZ  r[  r\  r]  _arear   r   r   r   r   r   r   r   r   r   r   s                               r   _geojsonrl  2  sB   3II			5	!IIh%3(!	yy
		+iilGG7B :& 	<(0D
	,'.C
	4;
#M2D51FFFFE NN$$$$"&-%/&++- $

7:+> ?"&**W\-B"C	#B RXX';<??;'D 	LL	 	LL9	
Ir4c2. 	KKM
 	AII	$ II	 IG//"WXKW"<=GG =	8#1!H+q{IVI/(Qx[)TGK 3y|,c'!*o.AB "s9Q<0#gaj/A2EF A"9-a03lB&+		 s9Q<0#gaj/A2EF :A"3y|#4c'!*o6IJ :#I.q1!494:: 		H!,((((%*, 		C  ((((
 1NIu 8#=~ GGI{	D	C$  		s%   "N*+N44A5N>*N14N;>O	rc   FILEPATH)metavarz--output-filez-ozuThe default output file name to use. If this isn't specified, clodius will replace the current extension with .hitile)defaulthelpz
--assemblyz-az6The genome assembly that this file was created againsthg19)rp  typero  z--chromosomezPOnly extract values for a particular chromosome. Use all chromosomes if not set.z--tile-sizez-tz^The number of data points in each tile. Used to determine the number of zoom levels to create.z--chunk-sizez-cz]How many values to aggregate at once. Specified as a power of two multiplier of the tile size   )rp  ro  z--chromosome-colz>The column number (1-based) which contains the chromosome namez--from-pos-colz@The column number (1-based) which contains the starting positionz--to-pos-colz>The column number (1-based) which contains the ending positionz--value-colzDThe column number (1-based) which contains the actual value positionz--has-header/--no-headerz2Does this file have a header that we should ignorez--methodz:The method used to aggregate values (e.g. sum, average...)rM   averagez--nan-valuez The string to use as a NaN valuez--transformnoner(  z--count-nanz1Simply count the number of nan values in the fileT)rp  is_flagz--closed-intervalz(Treat the to column as a closed intervalz--chromsizes-filenamez,A file containing chromosome sizes and orderz--zoom-stepz-zz5The number of intermediate aggregation levels to omit   c                 <    t        | |||||||||	|
|||||||       y r   )rD  )rc   rT   r   chromosome_colr4  r5  r6  r   r   r   r   re   r7  r8  r9  r:  rd   r   s                     r   bedgraphrz  
  sC    l %r   z{The default output file name to use. If this isn't specified, clodius will replace the current extension with .multires.bed)rp  z--importance-columnzThe column (1-based) containing information about how important that row is. If it's absent, then use the length of the region. If the value is equal to `random`, then a random value will be used for the importance (effectively leading to random sampling)z--max-per-tilez/The maximum number of entries to store per tile)ro  rr  rp  zoThe number of nucleotides that the highest resolution tiles should span. This determines the maximum zoom levelz--delimiter)ro  rr  z--offsetz3Apply an offset to all the coordinates in this filec                 .    t        | |||||||||	|
       y r   )r  )rc   rT   r   r   r   r   r   r   r   rd   r   s              r   bedfiler|    s.    H r   zvThe default output file name to use. If this isn't specified, clodius will replace the current extension with .bedpedb)rp  rr  show_defaultz-i)rp  rr  ro  r}  )rp  ro  r}  z-mz1The maximum number of entries to include per tilez-sz
--chr1-colz*The column containing the first chromosomez
--chr2-colz+The column containing the second chromosomez--from1-colz.The column containing the first start positionz--from2-colz/The column containing the second start positionz	--to1-colz,The column containing the first end positionz	--to2-colz-The column containing the second end positionz--sqlite-batch-sizezThe number of entries inserted into SQLite at once. The higher the faster the aggregation gets but more memory will be requiredz--sqlite-cache-sizezhThe SQLite cache size in MB. The higher the faster the aggregation gets but more memory will be requiredz-vz	--verbosezIncrease log statements)countrp  c                 ~    t        | fi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d| y)zAggregate bedpe filesrT   r   r   r   r   r   r   rd   r   r   r   r   r   r   r   r   r   N)r   )rc   rT   r   r   r   r   r   r   rd   r   r   r   r   r   r   r   r   r   s                     r   bedper    s    h   ,	
  "   0        ,!" ,#$ %r   zsThe default output file name to use. If this isn't specified, clodius will replace the current extension with .gjdbz~The assembly that this data comes from. This parameter is unnecessary and/or overwritten if --chromsizes-filename is specified   z.The file containnig chromosome sizes and orderz--starting-resolutionz<The resolution that the starting data is at (e.g. 1, 10, 20)rA   z--row-infos-filenamez<A file containing the names of the rows in the multivec filec           
      (    t        | |||||||       y)zAggregate a multivec fileN)rk   )rc   rT   r   r   rd   rS   re   rf   s           r   multivecr    s%    r 	r      z
--max-zoomr_  c                 "    t        | ||||       y)zAggregate a geojson fileN)rl  )rc   rT   r   r   r   s        r   ri  ri    s    6 X{L)XFr   )r!   Fr   )NNrl   Fr!   rm   NNr   rD   rn   ro   rp   rq   Nrr   rs   r   )=
__future__r   r   collectionsr   r   re  r   r   os.pathr   rX   rl   r   r   r  rV   numpyrP   clickclodius.arrayr  r  clodius.chromosomeschromosomesrZ   clodius.multivecr  r`   negspy.coordinatesrY  r)  
scipy.miscmiscrI   r   r   r   utilsr   r   groupr   r    r=   rk   r   r  rD  rl  commandargumentoptionChoiceavailable_chromsizesr   rz  r   r|  r  ri  r
   r   r   <module>r     sQ	   /     	    
      !      9 	 	& 1	t BG32 AL 'Qh@L
^F	Up 
J/
	 	A	-b--/	0 
& 
	 
  	L
 	N
 	L
 	R
 	=
 	E	ui(	)	 :d 	E	vv&	'	 	<
 HRV 	7
 	C	' 0 D'T 
J/
	 $U 
G 	=
 
& 		:	 
: mT4	7
 	>		 5 0 n8 
J/
 
 	A	 
G 

 	=	 	<	 
:	 
&	 	7	 	5	 	6	 	9	 	:	 	7	 	8	 
I	 
I	 dKt2KL( M
 0 ~(V 
J/
	 
K	 
:	 	9	 	G
 	E	uk*	+	 	G
 0 \. 
J/
	 d$bs;
:	 
:	G < 0 2Gr   