
    tfZ2                         d dl Z d dlZd dlZd dlZd dlmc mZ d dl	m
Z
mZ d dlmZ d Z	 ddZd Zd Zd Z	 	 	 	 dd	Zdd
Z	 	 	 	 ddZy)    N)est_query_size_ixload_bai_index)abs2genomicc                 \   g }d}| j                   }| j                  }| j                  | j                  z   }|D ]  }|d   t        j                  k(  r#|j                  ||z   d|d   f       ||d   z  }=|d   t        j                  k(  r|j                  ||z   d|d   f       n|d   t        j                  k(  r#|j                  ||z   d|d   f       ||d   z  }|d   t        j                  k(  r#|j                  ||z   d|d   f       ||d   z  }|d   t        j                  k(  s|d   t        j                  k(  s||d   z  } t        |      r|d   }|d   }|d   t        j                  k(  r|j                  ||d   z
  d|d   f       |d   t        j                  k(  r|j                  ||d   z
  d	|d   f       |d   t        j                  k(  r|j                  ||d   z
  d|d   f       |d   t        j                  k(  r|j                  |d	|d   f       |S )
Nr   X   IDNSH)cigartuplesposquery_lengthpysamCDIFFappendCINSCDEL	CREF_SKIPCEQUALCMATCHlen
CSOFT_CLIP
CHARD_CLIP)	readsubscurr_posr   	readstartreadendctuplefirst_ctuplelast_ctuples	            [/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/tiles/bam.pyget_cigar_substitutionsr&      s*   DH""KIhh***G "!9#KKX-sF1I>?q	!HAY%**$KKX-sF1I>?AY%**$KKX-sF1I>?q	!HAY%//)KKX-sF1I>?q	!HAY%,,&&)u||*Cq	!H" ;"1~!"o?e...KK\!_4c<?KL?e...KK\!_4c<?KLq>U---KK;q>13AGHq>U---KK#{1~67K    c                    |,g }|j                         D ]  \  }}||t        |      ggz  } nt        j                  | j                        }	t        j                  | j
                        }
t        j                  |	      }	t        t        |	|
D cg c]  }t        |       c}            }|D cg c]  }|d   	 }
}t        j                  dt        j                  |
      f   }i dg dg dg dg dg d	g d
g dg dg dg dg dg dg dg dg dg }ddd}t        |      }d}t        |
||      D ]  \  }}}|t        ||   ||      z  } d}||kD  rddiS t        |
||      D ]  \  }}}t        ||         }|t        |      k\  r%||   d    }| j!                  |||      }|D ]D  }|j"                  r	 d}|j$                  r|j&                  rd}|j(                  rd}|j*                  |z   }|dxx   |gz  cc<   |dxx   t        |j,                  |z         gz  cc<   |dxx   t        |j.                  |z         gz  cc<   |dxx   |j0                  gz  cc<   |d	xx   |gz  cc<   |d
xx   |j2                  gz  cc<   |dxx   |j4                  gz  cc<   |j6                  dkD  }|rt9        ||      }nd}|s|j:                  r	 |j=                  dd      D cg c]F  }||d   cxk  r|k  r6n n3|d    .|d    j?                         r|d   |d   |j:                  |d      fH }}|rtC        |||       |dxx   |gz  cc<   n|dxx   g z  cc<   n|dxx   |gz  cc<   |dxx   tE        |      gz  cc<   tG        |jH                        }|dxx   |jK                  d!d      gz  cc<   |dxx   ||jL                     gz  cc<   	 |dxx   |jO                  d"      gz  cc<   G  |S c c}w c c}w c c}w # t@        $ r g }Y w xY w#   xY w# tP        $ r |dxx   dgz  cc<   Y w xY w)#a  
    Sample reads from the specified region, assuming that the chromosomes
    are ordered in some fashion. Returns an list of pysam reads

    Parameters:
    -----------
    samfile: pysam.AlignmentFile
        A pysam entry into an indexed bam file
    start_pos: int
        The start position of the sampled region
    end_pos: int
        The end position of the sampled region
    chromsize: pandas.Series
        A listing of chromosome sizes. If not provided, the chromosome
        list will be extracted from the the bam file header
    cache:
        An object that implements the `get`, `set` and `exists` methods
        for caching data

    Returns
    -------
    reads: [read1, read2...]
        The list of in the sampled regions
    Nr   r   idfromtomdchrName	chrOffsetcigarm1Fromm1Tom2Fromm2Tomapqztags.HPstrandvariantscigars-+)TFg    NAerrorz,Tile encompasses too much data: {total_size} _1_2i@  T)with_seqmatches_only   HPMD))itemsintnparray
referenceslengthsctbw	natsortedlistzipr_cumsumr   r   r   r   fetchis_unmapped	is_pairedis_read1is_read2
query_namereference_startreference_endreference_namecigarstringr4   r   get_cached_variantsquery_sequenceget_aligned_pairsislower
ValueErrorset_cached_variantsr&   dicttagsget
is_reverseget_tagKeyError)samfile	start_posend_pos
chromsizesindex_filenamecachechromsizes_listchromsizerG   rH   lengthrabs_chrom_offsetsresultsstrandsidx
total_sizecidstartendMAX_SIZE
chr_offsetseq_namereadsr   	id_suffixread_id	use_cacher6   r`   s                                  r%   
load_readsr   4   s5   < %++- 	4KE4D	 233O	4 XXg001
((7??+ ^^J/
s:'/RF/RST,-qt-G-a7!334b 	b 	b	
 	2 	R 	 	" 	 	" 	 	 	2 	" 	B  	"!G& %G

(CJ 'w	7C >UC'C%==
> HHGHH&w	7C _UC*3/0
#o&&%c*1-.hs3 W	D*:	>>}}$(	}}$(	//I5'*C(<(<z(I$J#KK#d&8&8:&E"F!GG	"t':':&;;"$4$ T%5%5$66 DII;.
 !--5	25'BH#H  *** *.)?)?-1 *@ *"($% $)AaD#7C#7$%aD$4$%aDLLN "#1qtT-@-@1-F G(H ( %/wI
+z9+
+r1+J'H:5'!&=d&C%DD!DII	"txxa'8&99"!gdoo&>%??!$,,t"4!55iW	_B NO 0S-D(  * *')H*"  "%sc   O=2PDPP+AP6P8BPP#PPPPPP #P>=P>c                 |    d| }| r5| j                  |      r$t        j                  | j                  |            S y)zTry to get variants from a read we've seen before.

    This is useful for ONT reads where there's many variants
    per read and retrieving them takes a while.
    	variants.N)existsjsonloadsra   )rj   r}   cache_ids      r%   rY   rY      s9     7)$Hh'zz%))H-..r'   c                 ^    d| }| r&| j                  |t        j                  |             yy)z$Save a set of variants to the cache.r   N)setr   dumps)rj   r}   r6   r   s       r%   r^   r^      s-    7)$H		(DJJx01 r'   c                    |Hg }|j                         D ]  \  }}||t        |      ggz  } t        |D cg c]  }|d   	 c}      }nt        | j                        }t	        j
                  | j                        }t	        j
                  | j                        }t        t        ||            }	t        j                  |      }|D 
cg c]  }
|	|
   	 }}
t        t        ||D cg c]  }t        |       c}            }d}t        j                  t        j                  ||z        t        j                  d      z        }d}dg|g|d|z  z  ||||d}|S c c}w c c}
w c c}w )at  
    Get the tileset info for a bam file

    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from

    Returns
    -------
    tileset_info: {'min_pos': [],
                    'max_pos': [],
                    'tile_size': 1024,
                    'max_zoom': 7
                    }
    r      r@   i r   )min_posmax_pos	max_width	tile_sizerh   max_zoommax_tile_width)rC   rD   sumrH   rE   rF   rG   r_   rL   rI   rJ   rK   mathceillog)re   rh   rk   rl   rm   ctotal_lengthrG   rH   ref_lengthsro   rn   r   r   MAX_TILE_WIDTHtileset_infos                   r%   alignment_tileset_infor      s[   " %++- 	4KE4D	 233O	4 /:QAaD:;7??+XXg001
((7??+3z734^^J/
+56a;q>66s:'/RF/RSTIyy,":;dhhqkIJH N 3 >h.%(L ; ; 7/Rs   EE$)E)c           	      `   g }t        | |      }|D ]  }|j                  d      d   j                  d      }	t        t        t        |	dd             }
|d   dt	        |
d         z  z  }|r||k\  r|dd	| ifgc S t	        |
d         |z  }||z   }t        | |||||
      }|||fgz  } |S )a  
    Generate tiles from a bigwig file.

    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0) identifying the tiles
        to be retrieved
    index_filename: str
        The name of the file containing the index
    max_tile_width: int
        How wide can each tile be before we return no data. This
        can be used to limit the amount of data returned.
    cache:
        An object that implements the `get`, `set` and `exists` methods
        for caching data
    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    |r   .r      r   r@   r:   z1Tile too large, no data returned. Max tile size: )rf   rg   rh   ri   rj   )r   splitrK   maprD   r   )re   tile_idsri   rh   r   rj   generated_tilestsinfotile_idtile_id_partstile_position
tile_widthrf   rg   
tile_values                  r%   alignment_tilesr   3  s   > O#GZ8F 7c*1-33C8SmAa&89:K(1M!4D0E+EE
jN: #TUcTd!e  M!,-
:I*,G##%-J * 566O97< r'   c                 D    t        j                  |       }t        ||      S N)r   AlignmentFiler   )filenamerh   re   s      r%   r   r   v  s    !!(+G!':66r'   c                 `    |s|  d}t        j                  | |      }t        ||||d |      S )Nz.bai)ri   )ri   rh   r   rj   )r   r   r   )r   r   ri   rh   r   rj   re   s          r%   tilesr   |  sD     $:T*!!(>JG% r'   )NNN)NNNNr   )r   r   numpyrE   r   clodius.tiles.bigwigr   bigwigrI   clodius.tiles.tabixr   r   clodius.tiles.utilsr   r&   r   rY   r^   r   r   r    r'   r%   <module>r      sn        # # A +%R NRqh
24t 
@F7 
r'   