o
    Nrf)                     @   s   d dl Zd dlmZ d dlZd dlmZmZ d dl	Z
d dlmZmZ d dlmZ d%ddZdd	 Zd
d Zdd Zdd Zd&ddZG dd deZG dd deZdd Zdd ZG dd deZdd Zdd  Zd!d" Zd#d$ ZdS )'    N)ListOptional)	BaseModel	validator)load_chromsizes   c              
      s   g }t |  fdddD ]a}|d}ttt|dd }d}|D ]B}|D ]9}|d}	ttt|	dd   }
d}t||
D ]\}}tt|t| dkrSd	}qA|s_||g7 }d	} nq&|rd nq"|sm||gg7 }q|S )
a  
    Partition a set of tile ids into sets of adjacent tiles. For example,
    if we're requesting a set of four tiles that form a rectangle, then
    those four tiles will become one set of adjacent tiles. Non-contiguous
    tiles are not grouped together.

    Parameters
    ----------
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    dimension: int
        The dimensionality of the tiles

    Returns
    -------
    tile_lists: [tile_ids, tile_ids]
        A list of tile lists, all of which have tiles that
        are within 1 position of another tile in the list
    c                    s    dd |  ddd   D S )Nc                 S      g | ]}t |qS  int).0pr	   r	   \/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/tiles/utils.py
<listcomp>$       zApartition_by_adjacent_tiles.<locals>.<lambda>.<locals>.<listcomp>.r   split)x	dimensionr	   r   <lambda>$   s     z-partition_by_adjacent_tiles.<locals>.<lambda>keyr   r      F   T)sortedr   listmapr   zipabs)tile_idsr   Ztile_id_liststile_idtile_id_partstile_positionaddedZtile_id_listZ
ct_tile_idZct_tile_id_partsZct_tile_positionZ	far_apartp1p2r	   r   r   partition_by_adjacent_tiles   s8   



r(   c                 C   s   t | \}}| dks| dkrdS | dks!| dkr#dS | dkr+dS | d	kr3d
S | dkr;dS | dkrCdS d S )Nz.bwz.bigwigbigwigz.mcoolz.coolcoolerz.htimetime-interval-jsonz.hitilehitilez.beddbbeddbz.mv5Zmultivec)opsplitextlower)filename_extr	   r	   r   infer_filetypeJ   s   r4   c                 C   s@   | dkrdS | dkrdS | dkrdS | dkrdS | dkrd	S d S )
Nr*   matrixr)   Zvectorr+   ztime-intervalr,   r-   Zbedliker	   )filetyper	   r	   r   infer_datatype]   s   r7   c                 C   sh   g }| D ]-}| d}t|dk rtdtt|d |d |d g\}}}||||||fg7 }q|S )Nr   r   zNot enough tile info presentr   r      )r   len
IndexErrorr   r   )r!   tiles_functiontile_valuesr"   partszr   yr	   r	   r   tiles_wrapper_2dj   s   
"r@   c                    s   g }t | }|D ]]}t|d dd }|d dd  dd |D }tdd |D }tdd |D }tdd |D }	td	d |D }
||||	|| d |
|	 d d
}| fdd|D 7 }q|S )aF  
    Bundle adjacent tile requests so that they can be
    processed concurrently. This is helpful for function
    that require scanning a dataset. It's faster to filter
    a large region and then break it down into individual
    tiles than to go over the entire dataset and filter
    individual tiles multiple times.
    r   r   r   c                 S   s(   g | ]}d d | ddd D qS )c                 S   r   r	   r
   )r   r   r	   r	   r   r      r   z7bundled_tiles_wrapper_2d.<locals>.<listcomp>.<listcomp>r   r   r   r   r   tr	   r	   r   r      s   ( z,bundled_tiles_wrapper_2d.<locals>.<listcomp>c                 S      g | ]}|d  qS r   r	   rA   r	   r	   r   r      r   c                 S   rC   rD   r	   rA   r	   r	   r   r      r   c                 S   rC   r   r	   rA   r	   r	   r   r      r   c                 S   rC   rE   r	   rA   r	   r	   r   r      r   )widthheightc              
      s,   g | ]\}}d   dtt||fqS )z{}.{}r   )formatjoinr   str)r   r$   dataZ
tileset_idr	   r   r          )r(   r   r   minmax)r!   r;   r<   Zpartitioned_tile_listsZ
tile_group
zoom_levelZtile_positionsZminxZmaxxZminyZmaxytfr	   rL   r   bundled_tiles_wrapper_2dz   s"   	
rR   r   c                 C   s   | d }| d }t |d |d  |d |d  }|d|  }	|d ||	  }
|d || |	  }|d ||	  }|d || |	  }|
|||gS )a  
    Get the coordinate boundaries for the given tile.

    Parameters:
    -----------
    tsinfo: { min_pos: [], max_pos [] }
        Tileset info containing the bounds of the dataset
    z: int
        The zoom level
    x: int
        The x position
    y: int
        The y position
    width: int
        Return bounds for a region encompassing multiple tiles
    height: int
        Return bounds for a region encompassing multiple tiles
    min_posmax_posr   r   r   )rO   )tsinfor>   r   r?   rF   rG   rS   rT   	max_width
tile_widthZfrom_xZto_xZfrom_yZto_yr	   r	   r   tile_bounds   s   "rX   c                   @   sV   e Zd ZU eed< eed< ee ed< ee ed< eddd Zeddd Zd	S )
TilesetInfoZmax_zoomrV   rT   rS   c                 C      |dk rt dt|S )2Check to make sure the zoom level is 0 or greater.r   z1The zoom level must be greater than or equal to 0
ValueErrorr   clsvr	   r	   r   max_zoom_zero_or_greater      z$TilesetInfo.max_zoom_zero_or_greaterc                 C   s   |dkrt dt|S )z2Check to make sure the max_width is greater than 0r   z$The max_width must be greater than 0r\   r^   r	   r	   r   max_width_greater_than_zero   rb   z'TilesetInfo.max_width_greater_than_zeroN)	__name__
__module____qualname__r   __annotations__r   r   ra   rc   r	   r	   r	   r   rY      s   
 
rY   c                   @   sV   e Zd ZU eed< ee ed< ee ed< ee ed< ee ed< eddd ZdS )	TileInfozoompositionrF   startendc                 C   rZ   )r[   r   z%The zoom level must be greater than 0r\   r^   r	   r	   r   zoom_zero_or_greater   rb   zTileInfo.zoom_zero_or_greaterN)	rd   re   rf   r   rg   r   r   r   rm   r	   r	   r	   r   rh      s   
 rh   c                    s   |  dd  d}ttt|dd }t|d jdt|d    fddt|dd  D } fd	dt|dd  D }t|dd   ||d
S )N|r   r   r   r8   r   c                    s,   g | ]\}}| j d     j|  qS r   rV   rS   r   ipos)rU   rP   r	   r   r      rM   z!parse_tile_id.<locals>.<listcomp>c                    s0   g | ]\}}|j d    j|    qS ro   rp   rq   rW   rU   rP   r	   r   r      s    )ri   rj   rF   rk   rl   )r   r   r   r   rV   	enumeraterh   )r"   rU   r#   r$   ZstartsZendsr	   rt   r   parse_tile_id   s"   
rv   c           
      c   s    t jdt | f }t j|||gddd \}}|||  }|||  }|}t||D ]}	|	|| |	 fV  d}q,|t|t|fV  dS )a0  
    Convert absolute coordinates to genomic coordinates

    Parameters:
    -----------
    chromsizes: [[chrom, size],...]
        A list of chromosome sizes associated with this tileset
    start_pos: int
        The absolute start coordinate
    end_pos: int
        The absolute end coordinate
    r   right)Zsider   N)npZr_ZcumsumZsearchsortedranger   )

chromsizes	start_posend_posZabs_chrom_offsetsZcid_loZcid_hiZ
rel_pos_loZ
rel_pos_hirk   cidr	   r	   r   abs2genomic   s   r~   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )ChromosomeIntervalr}   namerk   rl   N)rd   re   rf   r   rg   rJ   r	   r	   r	   r   r     s
   
 r   c              	   c   s\    t | \}}}t|||D ]\}}}zt||| ||dV  W q ty+   Y  dS w dS )zConvert an absolute genomic range to sections of genomic ranges.

    E.g. (1000,2000) => [('chr1', 1000, 1500), ('chr2', 1500, 2000)]
    )r}   r   rk   rl   N)r   r~   r   r:   )Zchromsizes_filenamerk   rl   Z
chrom_infoZchrom_namesZchrom_sizesr}   r	   r	   r   abs2genome_fn  s   r   c                 C   s&   t t| | }tt t |S )au  
    Depth of quad tree necessary to tesselate the concatenated genome with quad
    tiles such that linear dimension of the tiles is a preset multiple of the
    genomic resolution.

    Parameters:
    -----------
    chromsizes: pandas.Series
        A series representation of the chromosome sizes
    tile_size_bp: int
        The size of each tile in the tileset
    )rx   ceilsumr   log2)rz   Ztile_size_bpZmin_tile_coverr	   r	   r   get_quadtree_depth0  s   r   c                 C   s   |  ddkr#| d}| ddkr!|d}t|d |d S dS | ddkr,dS tdtj}tdd || D }tdd ||D }d	D ]}|| v rX dS ||  v ra dS qMz||k rjW dS ||krqW dS W dS  ty}   Y dS w )
Nr2   r   r   z(\d+)c                 S   $   g | ]}|r|  rt|n|qS r	   isdigitr   r   ar	   r	   r   r   S     $ znatcmp.<locals>.<listcomp>c                 S   r   r	   r   r   r	   r	   r   r   T  r   )mr?   r   )	findr   natcmprecompileUtupler0   	TypeError)r   r?   Zx_partsZy_partsZ	_NS_REGEXr   r	   r	   r   r   A  s4   

r   c                 C   s   t | ttdS )z3
    Sort an iterable by natural genomic order
    r   )r   ft
cmp_to_keyr   )iterabler	   r	   r   	natsortedi  s   r   ro   )r   r   )	functoolsr   Zos.pathpathr.   r   typingr   r   numpyrx   Zpydanticr   r   Zclodius.chromosomesr   r(   r4   r7   r@   rR   rX   rY   rh   rv   r~   r   r   r   r   r   r	   r	   r	   r   <module>   s,    
>
%"(