
    tf)                         d dl Zd dlmZ d dlZd dlmZmZ d dl	Z
d dlmZmZ d dlmZ ddZd Zd Zd Zd	 Zdd
Z G d de      Z G d de      Zd Zd Z G d de      Zd Zd Zd Zd Zy)    N)ListOptional)	BaseModel	validator)load_chromsizesc                    g }t        | fd      D ]  }|j                  d      }t        t        t        |dd             }d}|D ]  }|D ]z  }|j                  d      }	t        t        t        |	ddz                }
d}t        ||
      D ]+  \  }}t        t	        |      t	        |      z
        dkD  s*d}- |rr||gz  }d} n |s n |r||ggz  } |S )	a  
    Partition a set of tile ids into sets of adjacent tiles. For example,
    if we're requesting a set of four tiles that form a rectangle, then
    those four tiles will become one set of adjacent tiles. Non-contiguous
    tiles are not grouped together.

    Parameters
    ----------
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    dimension: int
        The dimensionality of the tiles

    Returns
    -------
    tile_lists: [tile_ids, tile_ids]
        A list of tile lists, all of which have tiles that
        are within 1 position of another tile in the list
    c                 j    | j                  d      ddz    D cg c]  }t        |       c}S c c}w )N.   )splitint)xp	dimensions     ]/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/tiles/utils.py<lambda>z-partition_by_adjacent_tiles.<locals>.<lambda>$   s,    a!i-1P QAQ Q  Qs   0keyr
   r      F   T)sortedr   listmapr   zipabs)tile_idsr   tile_id_liststile_idtile_id_partstile_positionaddedtile_id_list
ct_tile_idct_tile_id_partsct_tile_position	far_apartp1p2s    `            r   partition_by_adjacent_tilesr)      s   * MQ ")  c*
 SmAa&89:) 	L* 
#-#3#3C#8 #'C1A!a)m1T(U#V !	 "-1AB )FB3r7SW,-1$(	)
 ! WI-L E" )	* wi[(ME")H     c                 p   t        j                  |       \  }}|j                         dk(  s|j                         dk(  ry|j                         dk(  s|j                         dk(  ry|j                         dk(  ry|j                         d	k(  ry
|j                         dk(  ry|j                         dk(  ryy )Nz.bwz.bigwigbigwigz.mcoolz.coolcoolerz.htimetime-interval-jsonz.hitilehitilez.beddbbeddbz.mv5multivec)opsplitextlower)filename_exts      r   infer_filetyper8   J   s    [["FAs
yy{esyy{i7		 CIIK7$:		 #			!		 		r*   c                 @    | dk(  ry| dk(  ry| dk(  ry| dk(  ry| dk(  ry	y )
Nr-   matrixr,   vectorr.   ztime-intervalr/   r0   bedlike )filetypes    r   infer_datatyper?   ]   s?    88''87 r*   c                     g }| D ]\  }|j                  d      }t        |      dk  rt        d      t        t        |d   |d   |d   g      \  }}}|| ||||      fgz  }^ |S )Nr
   r   zNot enough tile info presentr   r      )r   len
IndexErrorr   r   )r   tiles_functiontile_valuesr   partszr   ys           r   tiles_wrapper_2drI   j   s    K <c"u:>;<<cE!HeAha9:1a.Aq"9:;;< r*   c                    g }t        |       }|D ]D  }t        |d   j                  d      d         }|d   j                  d      d   }|D cg c]-  }|j                  d      dd D cg c]  }t        |       c}/ }	}}t        |	D cg c]  }|d   	 c}      }
t	        |	D cg c]  }|d   	 c}      }t        |	D cg c]  }|d   	 c}      }t	        |	D cg c]  }|d   	 c}      } |||
|||
z
  dz   ||z
  dz         }||D cg c]6  \  }}dj                  |dj                  t        t        |                  |f8 c}}z  }G |S c c}w c c}}w c c}w c c}w c c}w c c}w c c}}w )aF  
    Bundle adjacent tile requests so that they can be
    processed concurrently. This is helpful for function
    that require scanning a dataset. It's faster to filter
    a large region and then break it down into individual
    tiles than to go over the entire dataset and filter
    individual tiles multiple times.
    r   r
   r   r   r   )widthheightz{}.{})	r)   r   r   minmaxformatjoinr   str)r   rD   rE   partitioned_tile_lists
tile_group
zoom_level
tileset_idtr   tile_positionsminxmaxxminymaxytfr    datas                    r   bundled_tiles_wrapper_2dr^   z   s|    K8B, 

A,,S1!45
]((-a0
GQR!1773<!+<=a3q6=RR.1QAaD12.1QAaD12.1QAaD12.1QAaD12d$+/$+PQ/
 	)+
% ^^JS-1H(IJDQ
 	
!
* # >R1111
s6   E+E=EE%
*E*
E/
"E4
;E9
Ec                     | d   }| d   }t        |d   |d   z
  |d   |d   z
        }|d|z  z  }	|d   ||	z  z   }
|d   ||z   |	z  z   }|d   ||	z  z   }|d   ||z   |	z  z   }|
|||gS )a  
    Get the coordinate boundaries for the given tile.

    Parameters:
    -----------
    tsinfo: { min_pos: [], max_pos [] }
        Tileset info containing the bounds of the dataset
    z: int
        The zoom level
    x: int
        The x position
    y: int
        The y position
    width: int
        Return bounds for a region encompassing multiple tiles
    height: int
        Return bounds for a region encompassing multiple tiles
    min_posmax_posr   r   r   )rN   )tsinforG   r   rH   rK   rL   r`   ra   	max_width
tile_widthfrom_xto_xfrom_yto_ys                 r   tile_boundsri      s    & YGYGGAJ+WQZ'!*-DEIQ!V#JQZ!j.(F1:Uj00DQZ!j.(F1:Vz11DFD$''r*   c                   z    e Zd ZU eed<   eed<   ee   ed<   ee   ed<    ed      d        Z ed      d        Zy)TilesetInfomax_zoomrc   ra   r`   c                 8    |dk  rt        d      t        |      S )2Check to make sure the zoom level is 0 or greater.r   z1The zoom level must be greater than or equal to 0
ValueErrorr   clsvs     r   max_zoom_zero_or_greaterz$TilesetInfo.max_zoom_zero_or_greater   s      q5PQQ1vr*   c                 8    |dk  rt        d      t        |      S )z2Check to make sure the max_width is greater than 0r   z$The max_width must be greater than 0ro   rq   s     r   max_width_greater_than_zeroz'TilesetInfo.max_width_greater_than_zero   s      6CDD1vr*   N)	__name__
__module____qualname__r   __annotations__r   r   rt   rv   r=   r*   r   rk   rk      sP    MN#Y#Yz  { r*   rk   c                   t    e Zd ZU eed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<    ed      d        Zy)TileInfozoompositionrK   startendc                 8    |dk  rt        d      t        |      S )rn   r   z%The zoom level must be greater than 0ro   rq   s     r   zoom_zero_or_greaterzTileInfo.zoom_zero_or_greater   s      q5DEE1vr*   N)	rw   rx   ry   r   rz   r   r   r   r   r=   r*   r   r|   r|      sE    
I3iC=9	cNv r*   r|   c                 (   | j                  d      d   j                  d      }t        t        t        |dd             }t        |d         }|j                  dt        |d         z  z  }t        |dd        D cg c]*  \  }}||j                  d|z  z  z  |j                  |   z   , }}}t        |dd        D cg c]-  \  }}||j                  d|z  z  z  |j                  |   z   |z   / }	}}t        ||dd  |||	      S c c}}w c c}}w )N|r   r
   r   rA   r   )r}   r~   rK   r   r   )r   r   r   r   rc   	enumerater`   r|   )
r   rb   r   r    rT   rd   iposstartsendss
             r   parse_tile_idr      s<   MM#&q)//4MS-!"456M]1%&J!!A]1-=)>$>>J "-"34Q 	v!z/12V^^A5FFF  "-"34Q 
  1
?2	3fnnQ6G	G*	TD 
 qr" s   =/D 2Dc              #   ,  K   t         j                  dt        j                  |       f   }t        j                  |||gd      dz
  \  }}|||   z
  }|||   z
  }|}t	        ||      D ]  }	|	|| |	   f d} |t        |      t        |      f yw)a0  
    Convert absolute coordinates to genomic coordinates

    Parameters:
    -----------
    chromsizes: [[chrom, size],...]
        A list of chromosome sizes associated with this tileset
    start_pos: int
        The absolute start coordinate
    end_pos: int
        The absolute end coordinate
    r   right)sider   N)npr_cumsumsearchsortedranger   )

chromsizes	start_posend_posabs_chrom_offsetscid_locid_hi
rel_pos_lo
rel_pos_hir   cids
             r   abs2genomicr      s      a:!667
)Iw+?gNQRR FF .v66J,V44JEVV$ 5*S/)) #e*c*o
--s   BBc                   6    e Zd ZU eed<   eed<   eed<   eed<   y)ChromosomeIntervalr   namer   r   N)rw   rx   ry   r   rz   rQ   r=   r*   r   r   r     s    	H
IJ	Hr*   r   c              #      K   t        |       \  }}}t        |||      D ]  \  }}}	 t        |||   ||        y# t        $ r Y  yw xY ww)zConvert an absolute genomic range to sections of genomic ranges.

    E.g. (1000,2000) => [('chr1', 1000, 1500), ('chr2', 1500, 2000)]
    )r   r   r   r   N)r   r   r   rC   )chromsizes_filenamer   r   
chrom_infochrom_nameschrom_sizesr   s          r   abs2genome_fnr     sk     
 .==P-Q*Zk&{E3? UC	$k#.e 
  		s%   $A>A	AA
AAc                     t        j                  t        |       |z        }t        t        j                  t        j                  |                  S )au  
    Depth of quad tree necessary to tesselate the concatenated genome with quad
    tiles such that linear dimension of the tiles is a preset multiple of the
    genomic resolution.

    Parameters:
    -----------
    chromsizes: pandas.Series
        A series representation of the chromosome sizes
    tile_size_bp: int
        The size of each tile in the tileset
    )r   ceilsumr   log2)r   tile_size_bpmin_tile_covers      r   get_quadtree_depthr   0  s8     WWS_|;<Nrwwrww~./00r*   c                    | j                  d      dk\  rI| j                  d      }|j                  d      dk\  r#|j                  d      }t        |d   |d         S y|j                  d      dk\  ryt        j                  dt        j
                        }t        |j                  |       D cg c]"  }|s|j                         rt        |      n|$ c}      }t        |j                  |      D cg c]"  }|s|j                         rt        |      n|$ c}      }dD ]*  }||j                         v r y|| j                         v s* y 	 ||k  ry||kD  ryyc c}w c c}w # t        $ r Y yw xY w)Nr6   r   r   z(\d+))mrH   r   )findr   natcmprecompileUtupleisdigitr   r4   	TypeError)r   rH   x_partsy_parts	_NS_REGEXar   s          r   r   r   A  sQ   vvc{a''#,66#;!ggclG'!*gaj11 vvc{a 

8RTT*I9??1;MSaQRqyy{SV1STG9??1;MSaQRqyy{SV1STG  !'')!'')	Ww# TS"  s0   /E 7E 5E%=E%E* E* *	E65E6c                 J    t        | t        j                  t                    S )z3
    Sort an iterable by natural genomic order
    r   )r   ft
cmp_to_keyr   )iterables    r   	natsortedr   i  s     (f 566r*   )r   )r   r   )	functoolsr   os.pathpathr2   r   typingr   r   numpyr   pydanticr   r   clodius.chromosomesr   r)   r8   r?   rI   r^   ri   rk   r|   r   r   r   r   r   r   r   r=   r*   r   <module>r      s~      	 !  ) /;|&
 "J(D) *y 2.4 "1"%P7r*   