o
    Nrf                     @   s   d dl mZ d dlZd dlZd dlZddlmZm	Z	 e
eZdZdd Zddd	Zd
d Z	dddZi ddfddZdd ZdS )    )FastaN   )	natsortedget_quadtree_depthi   c                    s`   t | dd t fdd  D }t  }W d    n1 s$w   Y  t|| S )NFZone_based_attributesc                 3   s"    | ]}|t  j| fV  qd S N)lenrecords).0seqfa \/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/tiles/fasta.py	<genexpr>   s     z!get_chromsizes.<locals>.<genexpr>)r   dictkeysr   pdSeries)fapath
chromsizesZchromosomesr   r   r   get_chromsizes   s
   r   c                 C   s|   |du rt | }g }| D ]\}}||t|gg7 }qn	|}dd |D }t|t}dgt|gtd|  t||d}|S )a'  
    Get the tileset info for a FASTA file

    Parameters
    ----------
    fapath: string
        The path to the FASTA file from which to retrieve data
    chromsizes: [[chrom, size],...]
        A list of chromosome sizes associated with this tileset.
        Typically passed in to specify in what order data from
        the FASTA should be returned.

    Returns
    -------
    tileset_info: {'min_pos': [],
                    'max_pos': [],
                    'tile_size': 1024,
                    'max_zoom': 7
                    }
    Nc                 S      g | ]}t |d  qS r   intr
   cr   r   r   
<listcomp>0       z tileset_info.<locals>.<listcomp>r      )Zmin_posZmax_pos	max_width	tile_sizemax_zoomr   )r   itemsr   r   	TILE_SIZEsum)r   r   Zchromsizes_listchromsizer#   tileset_infor   r   r   r)      s"   

r)   c           
      c   s    t jdt | f }t j|||gddd \}}|||  }|||  }|}t||D ]}	|	|| |	 fV  d}q,|||fV  dS )a  
    Convert absolute genomic sizes to genomic

    Parameters:
    -----------
    chromsizes: [1000,...]
        An array of the lengths of the chromosomes
    start_pos: int
        The starting genomic position
    end_pos: int
        The ending genomic position
    r   right)Zsider   N)npZr_ZcumsumZsearchsortedrange)
r   	start_posend_posZabs_chrom_offsetsZcid_loZcid_hiZ
rel_pos_loZ
rel_pos_histartcidr   r   r   abs2genomic=   s   r1   c                    sr   |d u rt | }|  tt|||}t| dd fdd|D }W d    n1 s/w   Y  d|S )NFr   c                    s(   g | ]\}}} |  || j qS r   )r   )r
   r0   r/   endZchrom_namesr   r   r   r   `   s    z"get_fasta_tile.<locals>.<listcomp> )r   r   listr1   r   join)r   
zoom_levelr-   r.   r   Zcids_starts_endsZarraysr   r3   r   get_fasta_tileW   s   
r8   c                 C   sN  g }|D ]}| ddd }| dd }| d}	ttt|	dd }
tdd |D }|rGd	d |D }d
d |D }tj||d}nd}d|v rQ|d }||v rZ|| }nd}|
d }|
d }|du rlt| }t|t	}t	d||   }|r||kr|dd| ifg  S || }|| }t
| ||||}||d|ifg7 }q|S )ai  
    Generate tiles from a FASTA file.

    Parameters
    ----------
    fapath: str
        The filepath of the FASTA file
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0) identifying the tiles
        to be retrieved
    chromsizes_map: {uid: []}
        A set of chromsizes listings corresponding to the parameters of the
        tile_ids. To be used if a chromsizes id is passed in with the tile id
        with the `|cos:id` tag in the tile id
    chromsizes: [[chrom, size],...]
        A 2d array containing chromosome names and sizes. Overrides the
        chromsizes in chromsizes_map
    max_tile_width: int
        How wide can each tile be before we return no data. This
        can be used to limit the amount of data returned.
    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    |r   Nr   .   c                 S   s   g | ]}| d qS ):)split)r
   or   r   r   r      s    ztiles.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r   r   r   r   r   r   r      s    c                 S   r   r   r   r   r   r   r   r      r   )indexcosr    errorz1Tile too large, no data returned. Max tile size: sequence)r=   r5   mapr   r   r   r   r   r   r%   r8   )r   Ztile_idsZchromsizes_mapr   Zmax_tile_widthZgenerated_tilesZtile_idZtile_option_partsZtile_no_optionsZtile_id_partsZtile_positionZtile_optionsZ
chromnamesZchromlengthsZchromsizes_to_useZchromsizes_idr7   Ztile_pos	max_depthr"   r-   r.   Ztiler   r   r   tilesf   sD   



rE   c              
   C   sd   zt | }g }| D ]\}}|||g q|W S  ty1 } zt| td|d}~ww )a*  
    Get a list of chromosome sizes from this [presumably] fasta
    file.

    Parameters:
    -----------
    filename: string
        The filename of the fasta file

    Returns
    -------
    chromsizes: [(name:string, size:int), ...]
        An ordered list of chromosome names and sizes
    z-Error loading chromsizes from bigwig file: {}N)r   r$   append	ExceptionloggerrA   format)filenameZchrom_seriesdatar'   r(   exr   r   r   r      s   
r   r   )Zpyfaidxr   numpyr+   Zpandasr   loggingutilsr   r   	getLogger__name__rH   r%   r   r)   r1   r8   rE   r   r   r   r   r   <module>   s    

*
J