o
    Nrf                     @   sf   d dl Zd dlZd dlZd dlmZ dd Zdd Zdd ZdddZ	dd Z
dd Z	dddZdS )    N)abs2genomicc              
   C   sH  t | d}t| }td|dd \}}}}}d}g }t|D ]m}td|||d  d }|d7 }tt}	t|D ]4}
td|||d  \}}|d7 }|d	 d }d
d	|  d}t|||||  |	|< ||7 }q@td|||d  d }|dd|  7 }||	g7 }q#|W  d   S 1 sw   Y  dS )zLoad a reduced version of a bai index so that we can
    go through it and get a sense of how much data will be
    retrieved by a query.rbz<4cIN   z<I   r   z<II   <Q)	open	bytearrayreadstructunpackrangecoldefaultdictlist)index_filenamefb_n_refcindecesin_binbinsjbin_non_chunkbytes_to_read
unpack_strn_intv r"   \/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/tiles/tabix.pyload_bai_index   s*   

$r$   c                 C   s  t | d}t| }td|dd \}}}}}}}}}}	}
}d}dd ||||  dD }||7 }g }t|D ]m}td|||d	  d
 }|d	7 }t	t
}t|D ]4}td|||d  \}}|d7 }|d d }dd|  d}t|||||  ||< ||7 }q\td|||d	  d
 }|d	d|  7 }||g7 }q?tt||W  d   S 1 sw   Y  dS )zLoad a reduced version of a tabix index so that we can
    go through it and get a sense of how much data will be
    retrieved by a query.r   z<4ciiiiiiiiN$   c                 S   s   g | ]}| d qS )ascii)decode).0nr"   r"   r#   
<listcomp>@   s    z load_tbi_idx.<locals>.<listcomp>    z<ir   r   z<Iir   r   r   r   )gzipr	   r
   r   r   r   splitr   r   r   r   dictzip)r   r   r   r   r   formatZcol_seqZcol_begZcol_endmetaskipZl_nmr   namesr   r   r   r   r   r   r   r   r    r!   r"   r"   r#   load_tbi_idx)   sH    

$r4   c                 c   s.    t dt| |D ]}| |||  V  q	dS )z)Yield successive n-sized chunks from lst.r   N)r   len)lstr)   r   r"   r"   r#   chunksZ   s   r7         c                 c   s    | |} }d||d>  | }}t |d D ]4}|| |?  |||?  }}|| d }	t ||d D ]	}
|
V  |	d7 }	q3|d|d> | > 7 }|d8 }qdS )a  
    generate key of bins which may overlap the given region,
    check out https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3042176/
    and https://samtools.github.io/hts-specs/tabix.pdf
    for more information.
    Parameters
    ----------
    begin: int
        chromosome position begin
    end: int
        chromosome position end
    n_lvls: int, optional
        cluster level, for tabix, set to 5
    min_shift: int, optional
        minimum shift, for tabix, set to 14
    Returns
    -------
    generator
    r         N)r   )beginendZn_lvlsZ	min_shifttslr   er)   kr"   r"   r#   reg2bins`   s   


rC   c                 C   s   d}t t||D ]:}d|  krdkrCn q	| | rCd}t| | dD ]}||d d? |d d?  7 }||d d? |d d?  7 }q$q	|S )Nr   iI  iH  r   r:      )r   rC   r7   )ixstartr=   
total_sizebinZbin_sizechunkr"   r"   r#   est_query_size_ix   s   rJ   c                 C   s    || vrdS | | }t |||S )Nr   )rJ   )indexnamerF   r=   rE   r"   r"   r#   est_query_size   s   rM   c
                 C   s:  |	d u rd}	|d d|  }
|r|
|krddiS d}||d  d|  }|d |d  d|  }t t|||}g }|r]|D ]\}}}|t|krJq>|j| }|t||t|t|7 }q>d}||krjdd	| iS |D ]\}}}|t|krxql|j| }||t|t|t|7 }qlt||	krdd
t| iS |S )Ni   	max_widthr   errorzTile too wider   r:   i@B zTile too large zToo many values in tile )r   r   r5   rK   rM   intstr)filenamer   Z
chromsizesZtsinfozxZmax_tile_widthZ	tbx_indexZfetcherZmax_resultsZ
tile_widthZ
query_size	start_posend_posZcids_starts_endsZret_valsZcidrF   r=   ZchromZMAX_QUERY_SIZEr"   r"   r#   single_indexed_tile   s6   

rW   )r8   r9   )N)collectionsr   r,   r   Zclodius.tiles.bigwigr   r$   r4   r7   rC   rJ   rM   rW   r"   r"   r"   r#   <module>   s    !1
 