o
    Nrf                     @   s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dl	m
Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z" d dl#Z#ddl$m%Z% ddl&m'Z'm(Z( e%) dd Z*g fdd	Z+	dddZ,	dddZ-					
													 dddZ.dd Z/dd Z0dd  Z1e*2 ej3d!d"d#ej4d$d%dd&d'ej4d(d)d*e5e6 d+d,ej4d-dd.d'ej4d/d0dd1d'ej4d2d3d4d5d6ej4d7d8dd6ej4d9d:dd6ej4d;d<dd6ej4d=d>dd6ej4d?d@dd6ej4dAdBe5dCdDgdCd,ej4dEdFe7dd,ej4dGdBe5dHdIgdHd,ej4dJdKdLdMej4dNdOdLdMej4dPdQdd6ej4dRdSdTdUd6dVdW Z8e*2 ej3d!d"d#ej4d$d%ddXd'ej4d(d)d*dYej4dZd[dYej4d?d@dd6ej4d-dd.d'ej4d\d
e9d]d^ej4d/dd_d'ej4d`de7daej4dPdQdd6ej4dbdce9d d,ddde Z:e*2 ej3d!d"d#ej4d$d%dfe7dd,ej4d(d)d*e7dLdgej4dZdhd[e7ddLdiej4d?d@ddLdjej4d\dkdle9d
dLdiej4d/dmd_e9ddLdiej4d-d3d.e7ddLdiej4dPdQe7ddLdiej4dndoe9ddLdiej4dpdqe9ddLdiej4drdse9ddLdiej4dtdue9ddLdiej4dvdwe9ddLdiej4dxdye9ddLdiej4dzd{e9ddLdiej4d|d}e9ddLdiej4d~ddLdddd Z;e*2 ej3d!d"d#ej4d%d$ddd'ej4d)d(ddd'ej4dmd/dd_d'ej4d3dPddd'ej4dddd'ej4dAdBe5dCdgdCd,ej4dddd6dd Ze*2 ej3d!d"d#ej4d%d$ddd'ej4dkd\de9daej4dmd/dd_d'ej4dSddd_d'dd Z<dS )    )divisionprint_functionN   )cli)get_tile_pos_from_lng_lattransactionc                   C   s   dS )zW
    Aggregate a data file so that it stores the data at multiple
    resolutions.
    N r   r   r   ^/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/cli/aggregate.py	aggregate   s   r
   c                 C   sN   |  d |  d|||d|dtt||||d|
|	f
 |   d S )NaQ  
        CREATE TABLE tileset_info
        (
            zoom_step INT,
            max_length INT,
            assembly text,
            chrom_names text,
            chrom_sizes text,
            tile_size REAL,
            max_zoom INT,
            max_width REAL,
            header text,
            version text
        )
        z5INSERT INTO tileset_info VALUES (?,?,?,?,?,?,?,?,?,?)	)executejoinmapstrcommit)cursor	zoom_step
max_lengthassemblychrom_nameschrom_sizes	tile_sizemax_zoom	max_widthversionheaderr   r   r	   store_meta_data'   s&   r   d   Fc                 C   sV   dd }t || || }| }|rt|dd d}nt|dd d}|d | S )Nc                 S   s   dd | D S )Nc                 S   s   g | ]}|d  |fqS )r   ).0er   r   r	   
<listcomp>d       zDreduce_values_by_importance.<locals>.extract_key.<locals>.<listcomp>r   )entriesr   r   r	   extract_keyc   s   z0reduce_values_by_importance.<locals>.extract_keyc                 S   s   t | d S Nfloatxr   r   r	   <lambda>j       z-reduce_values_by_importance.<locals>.<lambda>keyc                 S   s   t | d  S r%   r'   r)   r   r   r	   r+   l       )dictvaluessorted)Zentry1Zentry2Zmax_entries_per_tileZreverse_importancer$   Zby_uidZcombined_by_uidZcombined_entriesr   r   r	   reduce_values_by_importance`   s   r3   c              	   C   s   t | d}|du rt| d d }t||\}	}
}|dkr!	 |dkr*dd }nd	d }td
| |durTt|d}dd |D }W d   n1 sNw   Y  nd}td| tj	|t
|
|dd ||||d dS )aE  
    Aggregate a multivec file.

    This is a file containing nxn data that is aggregated along only one axis.
    This data should be in an HDF5 file where each dataset is named for a
    chromosome and contains a 'resolutions' group containing values for the
    base level resolution.

    Example: f['chr1']['reslutions']['1000'] = [[1,2,3],[4,5,6]]

    The resulting data will be organized by resolution and chromosome.

    Example: f_out['chr1']['resolutions']['5000']=[[1000,2000,3000],[4000,5000,6000]]

    Aggregation is currently done by summing adjacent values.
    rNr   z.multires.mv5Zmaxtotal	logsumexpc                 S   s(   | j | jd ddf}tj|ddj S Nr   r&      )Zaxis)Treshapeshapesmr5   )r*   ar   r   r	   agg   s   z_multivec.<locals>.aggc                 S   s"   | j | jd ddfjddj S r6   )r8   r9   r:   sumr)   r   r   r	   r=      s   "zagg:c                 S   s   g | ]	}|  d qS )utf8)stripencoder   liner   r   r	   r!      s    z_multivec.<locals>.<listcomp>z
row_infos:c                 S   s$   t j| j| jd ddfddjS r6   )npZnansumr8   r9   r:   r)   r   r   r	   r+      s   $ z_multivec.<locals>.<lambda>)Z
chromsizesr=   starting_resolutionr   output_file	row_infos)h5pyFileopsplitextcchload_chromsizesprintopencmvZcreate_multivec_multireszip)filepathrF   r   r   chromsizes_filenamerE   methodrow_infos_filenameZf_in
chrom_infor   r   r=   frG   r   r   r	   	_multivecq   s8   




rX   random   r7                 順 c           *         s  d}dkrt d|  | dkrtj}n| dr!t| d}nt| d}|d u rC| }| dr9tj|d }tj|d d }t	
|rMt| t| \}} f	d	d
	g }|rm|  nB|  }z&| }t|d   t|d   t|d   t|d   W n ty   d|}t|w 	|g}|	fdddd |D D 7 }d uṙfdd|D }dkrt dt| d jd }ttt|| td }ttjdd  tj|d d}t|d| |||||d|  |d
 | }| d | d | dt|d  d | d | d d}dt!"dd }t#|dd d }g } g }!fd!d"}"t$|D ]\}#}$d}||krF|d||   
t%t&
fd#d|$d$ d |$d% d g}%t%t&
fd&d|$d$ d |$d% d g}&d'}'t'|%d |&d d D ]&}(|'s n t'|%d |&d d D ]})|| |( |) |krd(}' nqq|'r=t'|%d |&d d D ]}(t'|%d |&d d D ]})|| |( |)  d7  < qq| (||$d) |$d$ d |$d$ d |$d% d |$d% d |$d* |$d+ |$d, f
 |!(|$d$ d |$d$ d |$d% d |$d% d f d7 n	|d7 }||ksdt| |krT|"||| |! qY|"||| |! |)  d S )-Nr   r   zBEDPEDB Version -.gzrtr4   z.bedpedbc              
      s  |   }i }zD|d  }|d  }j| }j| }|t|d   |t|d   g|d< |t|d   |t|d   g|d< W n tyd   d |d  |d  }t|w t |d< |d d t|d   |d< t||d< t||d	< d u rt|d d |d d  |d d |d d  |d
< ndkrt		 |d
< nt
|td  |d
< | |d< |S )Nr   xsyszERROR converting chromosome position to genome position. Please make sure you've specified the correct assembly using the --assembly option or a chromsizes file using the . --chromsizes-filename option.Current assembly: {}, chromosomes: {},{}uidr   	chrOffsetchrom1chrom2
importancerY   fields)splitcum_chrom_lengthsintKeyErrorformatslugidnicer   maxrY   r(   )rC   partsdrh   ri   Zchrom1_offsetZchrom2_offset	error_str)	r   chr1_colchr2_colrV   	from1_col	from2_colimportance_columnto1_colto2_colr   r	   line_to_dict   s@   

	 8z_bedpe.<locals>.line_to_dictzCouldn't convert one of the bedpe coordinates to an integer. If the input file contains a header, make sure to indicate that with the --has-header option. Line: {}c                    s   g | ]}|r |qS r   r   rB   )r~   r   r	   r!   *  r"   z_bedpe.<locals>.<listcomp>c                 S   s   g | ]}|  qS r   )r@   rB   r   r   r	   r!   *  s    c                    s(   g | ]}|d   ks|d  kr|qS )rh   ri   r   r   ru   
chromosomer   r	   r!   -  s    "zFound z entriesr7   c                 S      t | S Nrn   valr   r   r	   r+   :      z_bedpe.<locals>.<lambda>)Zisolation_level)r   r   r   r   r   r   r   r   zPRAGMA synchronous = OFF;zPRAGMA journal_mode = OFF;zPRAGMA cache_size =   ;a5  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            fromX int,
            toX int,
            fromY int,
            toY int,
            chrOffset int,
            uid text,
            fields text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rFromX, rToX,
            rFromY, rToY
        )
        c                   S      t dd S )Nc                   S   
   t tS r   coldefaultdictrn   r   r   r   r	   r+   r     
 z*_bedpe.<locals>.<lambda>.<locals>.<lambda>r   r   r   r   r   r	   r+   r  r/   c                 S   
   | d  S Nrj   r   r)   r   r   r	   r+   t  r   r-   c                    sl   dkrt d  d t|  |d| |d| W d    n1 s'w   Y  |  |  d S )Nr   zInsert batch ()2INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?,?)-INSERT INTO position_index VALUES (?,?,?,?,?))rN   r   Zexecutemanyclear)conncinterval_insertsposition_index_inserts)counterverboser   r	   batch_inserty  s   
z_bedpe.<locals>.batch_insertc                       t |   S r   r   r)   
tile_widthr   r	   r+     r,   rd   re   c                    r   r   r   r)   r   r   r	   r+     r,   TFrj   rg   rf   rk   )*rN   sysstdinendswithgziprO   ospathrK   rJ   existsremoverL   rM   readliner@   rl   rn   
ValueErrorrp   lentotal_lengthmathceillogsqlite3register_adapterrD   int64connectr   r   r   r   r   r2   	enumeratelistr   rangeappendclose)*rR   rF   r   r{   
has_headermax_per_tiler   r   rS   rw   ry   r|   rx   rz   r}   r   sqlite_cache_sizesqlite_batch_sizer   ZBED2DDB_VERSIONrW   r   r   r#   
first_linert   rv   assembly_sizer   r   	curr_zoomtile_countsr   r   r   Z	entry_numru   	tile_fromtile_toempty_tilesijr   )r   rw   rx   rV   r   r   ry   rz   r{   r~   r   r|   r}   r   r	   _bedpe   s   





/

	 

"




$*



,
4r   c           *         s  d}|d u r| d }n|}t |rt| | dr'dd l}|| d}nt| d}zt|	|\ }}W n! t	yX   |	d u rMt
d|tjd Y d S t
d	|	tjd Y d S w td fd
d}g }t
d| |r|| }| |}nN|  }| |}z	|||g7 }W n$ ty   t
d|d  dtjd Y d S  ty   t
d| Y nw ttttdt| |d }|D ] }| |}z	|||g7 }W q ty   t
d| Y qw d urfdd|D }|} jd }	 ttt|| td }	 dd l}|t j!dd  t
d|d| |"|}t#|d|||||||d|  ||d i g }|D ]}|d }||< ||d |d |fg7 }qF|}|$ }|%d |%d d}d}|} |d ur||k r|} t&|fddd}!t
d | t'(t}"|!D ]}#|| kr^|d||   }|#d }$d!}%|$|#d k rt)|$| }&d"*||&}'	 |"|' |krd#}%n|$|7 }$|$|#d k s|%r|#d }$|$|#d k rt)|$| }&d"*||&}'|"|'  d7  < 	 |$|7 }$|$|#d k s|%rU|#d$  }(d%})|%|)|||(d& |(d |(d |(d' |(d |(d( |(d) f	 |d* dkr?t
d+||(d |(d   d,})|%|)||||(d |(d f |d7 }n	|d7 }|| ksd}q|+  d!S )-Nr[   z.beddbrb   r   rc   r4   zAssembly not found:filezChromsizes filename not found:c           
   	      s  zt | d }t | d }W n ty   td| w | d }du r* }ndkr3|| }ndkr< }n
t| t d  }||k rWtd| tjd	 ||}}t| d
krb| d
 }nd} j	| |  } j	| |  }|| }||t
 ||d| |t|d}	|	S )zw
        Convert a bed file line to a numpy array which can later
        be used as an entry in an h5py file.
        r   r7   z$Error parsing the position, line: {}r   NsizerY   zWARNING: stop < start:r   r[    r   )startPosendPosrf   namerg   rk   rj   r   )rn   r   rp   rY   r(   rN   r   stderrr   rm   rq   rr   r   r   )
rC   startstopZchromrj   Zbedline_nameZgenome_startZ
genome_end
pos_offsetrt   )rV   r{   offsetrandr   r	   line_to_np_array  sB   




z"_bedfile.<locals>.line_to_np_arrayz
delimiter:zUnable to find zz in the list of chromosome sizes. Please make sure the correct assembly or chromsizes filename is passed in as a parameterzInvalid line:r   c                    s   g | ]
}|d   kr|qS r   r   r   r   r   r	   r!   G  s    z_bedfile.<locals>.<listcomp>r7   c                 S   r   r   r   r   r   r   r	   r+   e  r   z_bedfile.<locals>.<lambda>zoutput_file:zheader:)	r   r   r   r   r   r   r   r   r   rf   r   r   a&  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            startPos int,
            endPos int,
            chrOffset int,
            uid text,
            name text,
            fields text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rStartZoomLevel, rEndZoomLevel, rStartPos, rEndPos
        )
        c                    s    | d  d  S )Nr&   rj   r   r)   )uid_to_entryr   r	   r+     s    r-   zmax_per_tile:Tz{}.{}Fr&   z0INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)rj   rg   r   rk   r   zcounter:r   ),rJ   r   r   r   r   r   rO   rL   rM   FileNotFoundErrorrN   r   r   rY   Randomr   r@   rl   ro   
IndexErrorr   r   r   r   r   r   rn   r   r   r   r   r   rD   r   r   r   r   r   r2   r   r   floorrp   r   )*rR   rF   r   r{   r   r   r   r   	delimiterrS   r   ZBEDDB_VERSIONr   Zbed_filer   r   r   ZdsetrC   r   Z
line_partsr   r   r   r   Z	intervalsru   rf   r   r   r   r   Zmax_viewable_zoomZsorted_intervalsr   intervalcurr_posZspace_availableZ	curr_tileZtile_idvalueZexec_statementr   )rV   r   r{   r   r   r   r	   _bedfile  s0  





	2
&
"

	



Ur   c           )   
      s  |d u rt | d d }td| t |rt| t|d}|d ur;t	|}dd t
|D }t|}nt|}dd t|D }t|}|j td  |	}	|	d	  g g d}g g gg g d	|  |	krt d	|  }|jd
t| |fdddg7 |jdt| |fdddg7 g g7 g g7 dg7 |	7 } d	|  |	ksu|jdddd}td| 	|jd<  |jd< ||jd< ||jd< ||jd< ||jd< |	|jd< tt|jd |	 td	  |jd< |	d	  |jd< d|jd< td|jd  td|jd  td|jd  td td|jd  t | d krBtj}n| d!rSdd l}|| d"}nt| d#}d} 	f
d$d%}g }g }|rt|  d}|D ]}|  } |j| |d&   t | |d&   }!|!| d&kr|t!j"g|!| d&  7 }|d&g|!| d&  7 }||!| d& 7 }| |d&  |krd&nd}"|d'kr| |d&  |ksd	t#| |d&   nt!j"}#n| |d&  |kst#| |d&  nt!j"}#|#gt | |d&  t | |d&    }$|"gt | |d&  t | |d&    }%|r'|$|#g7 }$|%|"g7 }%||$7 }||%7 }|!t$|$ |jd< |t$|$7 }t$|krrtd(t$| td)| ||d  |d   |d  }|d  }t$|ksEqx||| 	 t$| t!%| d  }&t!%| d  }'	 | }(|&| |(|( < |'| |(|( < |d&   t&t'(|&d		 7  < |d&   t&t'(|'d		 7  < | d  |< | d  |< |  7  < |d&7 }|	 krd S qz)*Nr   z.hitilezoutput file:wc                 S      g | ]}| d qS zutf-8rA   r   r<   r   r   r	   r!   0  s    
z_bedgraph.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r	   r!   6  s    zassembly_size:r7   Zvalues_rW   r   )dtypecompressionZnan_values_meta)r   )r   z	assembly:z	zoom-stepz
max-lengthr   zchrom-nameszchrom-sizeszchrom-orderz	tile-sizezmax-zoomz	max-widthzmax-positionzassembly size (max-length)z	max_zoom:zchunk-size:ra   rb   rc   r4   c              	      s  d}d  | 7  < d  |7  < t    }| d t  }td| d |||| |  t| krtdt|  t| d  }t| d  }	 td|  | }|| || < || || < |d   tt	|d	 7  < |d   tt	|d	 7  < | d  |< | d  |< |  7  < |d7 }|	 krd S t| ks<d S d S )Nr   r   z?position: {} progress: {:.2f} elapsed: {:.2f} remaining: {:.2f}zlen(data_buffers[curr_zoom])zpositions[curr_zoom]:r7   )
timer(   rN   rp   r   rD   arrayr   ctr
   )Zbuffers_to_addZnan_buffers_to_addr   Z	curr_timeZpercent_progress
curr_chunknan_curr_chunkr   
r   
chunk_sizeZdata_buffersZdsetsr   Znan_data_buffersZ	nan_dsetsZ	positionst1r   r   r	   add_values_to_data_buffers  sH   


z-_bedgraph.<locals>.add_values_to_data_buffersr   exp2zlen(values):zline:))rJ   rK   rN   r   r   r   rH   rI   ncZget_chrominfo_from_fileZget_chromorder_from_fileZget_chromsizes_from_fileZget_chrominfoZget_chromorderZget_chromsizesr   r   r   Zcreate_datasetr   attrsr   r   r   r   r   r   rO   r   r@   rl   rm   rn   rD   nanr(   r   r   r   r   r
   ))rR   rF   r   Z	chrom_colfrom_pos_col
to_pos_col	value_colr   r   r   r   rT   	nan_value	transform	count_nanclosed_intervalrS   r   rW   rV   Zchrom_orderr   zZdset_lengthru   r   r   r   r1   Z
nan_valuesZcurr_genome_posrC   rt   Zstart_genome_posZ	nan_countr   Zvalues_to_addZnan_counts_to_addr   r   r   r   r   r	   	_bedgraph  s  





















:





$r   c                 C   st  | dkrt j}n| drt| d}nt| d}|d u r"| d }n|}t|r.t| t	
|}g }dd }|d D ]V}	z|	d	 d
 }
W n   d }
Y z|	d	 d }W n   d }Y z.||	d d |
\}}}}}||||||
pu||pzt t	|	d t	|	d	 d W q= ty    w ttjdd  t|}|d |dd|dddddf |  | }|d |d d}d}tdd }t|dd d}d}|D ]}d}||krt|d |d  |}t|d! |d" |}d#}tt|d t|d d D ]*}|s n$tt|d t|d d D ]}|| | | |kr2d$} nq!q|rtt|d t|d d D ]"}tt|d t|d d D ]}|| | |  d7  < qYqH|d%|||d& |d |d! |d" |d  |d |d |d	 f
 |  |d'||d |d! |d" |d  f |  |d7 }n|d7 }||ksq|  d S )(Nra   rb   rc   r4   z.gjdbc                 S   s&  t j}t j }t j}t j }d}t| }zV| D ]Q}t|D ]D\}	}
t||
d }t||
d }t||
d }t||
d }|sa|	d | }|||	 d || d  7 }||| d ||	 d  8 }qt|d }qW n ty   | d }| d }| d }| d }t }Y nw ||||t|d fS )Ng        r   r   g       @)	r   infr   r   minrs   abs	TypeErrorrY   )ZcoordsZno_area_compZminXZmaxXZminYZmaxYareanZcoord_groupr   Zcoordr   r   r   r	   getRectF  s8   
z_geojson.<locals>.getRectfeatures
propertiesr  rf   geometrycoordinates)minLngmaxLngminLatmaxLatrj   rf   r  r  c                 S   r   r   r   r   r   r   r	   r+     r   z_geojson.<locals>.<lambda>z
        CREATE TABLE tileset_info
        (
            zoom_step INT,
            tile_size INT,
            max_zoom INT,
            min_x INT,
            max_x INT,
            min_y INT,
            max_y INT
        )
        z/INSERT INTO tileset_info VALUES (?,?,?,?,?,?,?)r      iL   iZ   aA  
        CREATE TABLE intervals
        (
            id int PRIMARY KEY,
            zoomLevel int,
            importance real,
            minLng int,
            maxLng int,
            minLat int,
            maxLat int,
            uid text,
            geometry text,
            properties text
        )
        z
        CREATE VIRTUAL TABLE position_index USING rtree(
            id,
            rMinLng, rMaxLng,
            rMinLat, rMaxLat
        )
        r   c                   S   r   )Nc                   S   r   r   r   r   r   r   r	   r+     r   z,_geojson.<locals>.<lambda>.<locals>.<lambda>r   r   r   r   r	   r+     r/   c                 S   r   r   r   r)   r   r   r	   r+     r   r-   r  r  r	  r
  TFr   rj   r   )r   r   r   r   rO   rJ   r   r   r   jsonloadr   rq   rr   dumps	Exceptionr   r   rD   r   r   r   r   r   r   r   r2   r   r   rn   r   )rR   rF   r   r   r   rW   geojsonr#   r  featurer  rf   r  r	  r
  r  Z_arear   r   r   r   r   ru   r   r   r   r   r   r   r   r	   _geojson2  s   





!



""""<r  rR   ZFILEPATH)metavarz--output-filez-ozuThe default output file name to use. If this isn't specified, clodius will replace the current extension with .hitile)defaulthelpz
--assemblyz-az6The genome assembly that this file was created againstZhg19)r  typer  z--chromosomezPOnly extract values for a particular chromosome. Use all chromosomes if not set.z--tile-sizez-tz^The number of data points in each tile. Used to determine the number of zoom levels to create.z--chunk-sizez-cz]How many values to aggregate at once. Specified as a power of two multiplier of the tile size   )r  r  z--chromosome-colz>The column number (1-based) which contains the chromosome namez--from-pos-colz@The column number (1-based) which contains the starting positionz--to-pos-colz>The column number (1-based) which contains the ending positionz--value-colzDThe column number (1-based) which contains the actual value positionz--has-header/--no-headerz2Does this file have a header that we should ignorez--methodz:The method used to aggregate values (e.g. sum, average...)r>   Zaveragez--nan-valuez The string to use as a NaN valuez--transformnoner   z--count-nanz1Simply count the number of nan values in the fileT)r  Zis_flagz--closed-intervalz(Treat the to column as a closed intervalz--chromsizes-filenamez,A file containing chromosome sizes and orderz--zoom-stepz-zz5The number of intermediate aggregation levels to omit   c                 C   s.   t | |||||||||	|
||||||| d S r   )r   )rR   rF   r   Zchromosome_colr   r   r   r   r   r   r   rT   r   r   r   r   rS   r   r   r   r	   bedgraph
  s(   vr  z{The default output file name to use. If this isn't specified, clodius will replace the current extension with .multires.bed)r  z--importance-columnzThe column (1-based) containing information about how important that row is. If it's absent, then use the length of the region. If the value is equal to `random`, then a random value will be used for the importance (effectively leading to random sampling)z--max-per-tilez/The maximum number of entries to store per tile)r  r  r  zoThe number of nucleotides that the highest resolution tiles should span. This determines the maximum zoom levelz--delimiter)r  r  z--offsetz3Apply an offset to all the coordinates in this filec                 C   s    t | |||||||||	|
 d S r   )r   )rR   rF   r   r{   r   r   r   r   r   rS   r   r   r   r	   bedfile  s   Dr  zvThe default output file name to use. If this isn't specified, clodius will replace the current extension with .bedpedb)r  r  show_defaultz-i)r  r  r  r  )r  r  r  z-mz1The maximum number of entries to include per tilez-sz
--chr1-colz*The column containing the first chromosomez
--chr2-colz+The column containing the second chromosomez--from1-colz.The column containing the first start positionz--from2-colz/The column containing the second start positionz	--to1-colz,The column containing the first end positionz	--to2-colz-The column containing the second end positionz--sqlite-batch-sizezThe number of entries inserted into SQLite at once. The higher the faster the aggregation gets but more memory will be requiredz--sqlite-cache-sizezhThe SQLite cache size in MB. The higher the faster the aggregation gets but more memory will be requiredz-vz	--verbosezIncrease log statements)countr  c                 C   sv   t | fi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d| dS )zAggregate bedpe filesrF   r   r{   r   r   r   r   rS   rw   ry   r|   rx   rz   r}   r   r   r   N)r   )rR   rF   r   r{   r   r   r   r   rS   rw   ry   r|   rx   rz   r}   r   r   r   r   r   r	   bedpe  sL    	
r!  zsThe default output file name to use. If this isn't specified, clodius will replace the current extension with .gjdbz~The assembly that this data comes from. This parameter is unnecessary and/or overwritten if --chromsizes-filename is specified   z.The file containnig chromosome sizes and orderz--starting-resolutionz<The resolution that the starting data is at (e.g. 1, 10, 20)r5   z--row-infos-filenamez<A file containing the names of the rows in the multivec filec              	   C   s   t | ||||||| dS )zAggregate a multivec fileN)rX   )rR   rF   r   r   rS   rE   rT   rU   r   r   r	   multivec  s   9r#     z
--max-zoomr  c                 C   s   t | |||| dS )zAggregate a geojson fileN)r  )rR   rF   r   r   r   r   r   r	   r    s   r  )r   Fr   )NNrY   Fr   rZ   NNr   r7   r[   r\   r]   r^   Nr_   r`   r   )=
__future__r   r   collectionsr   r   r  r   r   Zos.pathr   rJ   rY   r   r   r   rH   numpyrD   ZclickZclodius.arrayr   r   Zclodius.chromosomesZchromosomesrL   Zclodius.multivecr#  rP   Znegspy.coordinatesr  r   Z
scipy.miscmiscr;   rq   r   r   utilsr   r   groupr
   r   r3   rX   r   r   r   r  commandargumentoptionZChoiceZavailable_chromsizesr   r  rn   r  r!  r  r   r   r   r	   <module>   s   

:

F
    H  % Y.* 			.+