
    tf                         d dl mZ d dlZd dlZd dlZddlmZm	Z	  ej                  e      ZdZd ZddZd Z	 dd	Zi ddfd
Zd Zy)    )FastaN   )	natsortedget_quadtree_depthi   c                     t        | d      5 t        fdj                         D              }t        j                               }d d d        t	        j
                           S # 1 sw Y   !xY w)NFone_based_attributesc              3   R   K   | ]  }|t        j                  |         f   y wN)lenrecords).0seqfas     ]/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/tiles/fasta.py	<genexpr>z!get_chromsizes.<locals>.<genexpr>   s#     K#3BJJsO 45Ks   $')r   dictkeysr   pdSeries)fapath
chromsizeschromosomesr   s      @r   get_chromsizesr      s`    	vE	2 +bKKK
	*+ 99Z --+ +s   <A++A4c                     |7t        |       }g }|j                         D ]  \  }}||t        |      ggz  } n|}|D cg c]  }t        |d          }}t        |t              }dgt        |      gt        d|z  z  t        ||d}|S c c}w )a'  
    Get the tileset info for a FASTA file

    Parameters
    ----------
    fapath: string
        The path to the FASTA file from which to retrieve data
    chromsizes: [[chrom, size],...]
        A list of chromosome sizes associated with this tileset.
        Typically passed in to specify in what order data from
        the FASTA should be returned.

    Returns
    -------
    tileset_info: {'min_pos': [],
                    'max_pos': [],
                    'tile_size': 1024,
                    'max_zoom': 7
                    }
    r   r      )min_posmax_pos	max_width	tile_sizemax_zoomr   )r   itemsintr   	TILE_SIZEsum)r   r   chromsizes_listchromsizecr!   tileset_infos           r   r*   r*      s    * #F+
%++- 	4KE4D	 233O	4 %)89Ac!A$i9
9!*i8H3
O$h.%L  :s    Bc              #     K   t         j                  dt        j                  |       f   }t        j                  |||gd      dz
  \  }}|||   z
  }|||   z
  }|}t	        ||      D ]  }	|	|| |	   f d} |||f yw)a  
    Convert absolute genomic sizes to genomic

    Parameters:
    -----------
    chromsizes: [1000,...]
        An array of the lengths of the chromosomes
    start_pos: int
        The starting genomic position
    end_pos: int
        The ending genomic position
    r   right)sider   N)npr_cumsumsearchsortedrange)
r   	start_posend_posabs_chrom_offsetscid_locid_hi
rel_pos_lo
rel_pos_histartcids
             r   abs2genomicr<   =   s      a:!667
)Iw+?gNQRR FF .v66J,V44JEVV$ 5*S/)) %
##s   B Bc           	      0   |t        |       }|j                         }t        t        |||            }t	        | d      5 }|D 	
cg c]  \  }}	}
|||      |	|
 j
                   }}	}}
d d d        dj                        S c c}
}	}w # 1 sw Y   !xY w)NFr    )r   r   listr<   r   r   join)r   
zoom_levelr3   r4   r   chrom_namescids_starts_endsr   r;   r:   endarrayss               r   get_fasta_tilerF   W   s     #F+
//#KK
IwGH	vE	2 
b HX
 
4CCB{3 s+//
 


 776?

 
s   B B
(BBBc           	         g }|D ]J  }|j                  d      dd }|j                  d      d   }|j                  d      }	t        t        t        |	dd             }
t	        |D cg c]  }|j                  d       c}      }|rE|D cg c]  }|d   	 }}|D cg c]  }t        |d          }}t        j                  ||      }nd}d	|v r|d	   }||v r||   }nd}|
d   }|
d   }|t        |       }t        |t              }t        d
||z
  z  z  }|r||kD  r|dd| ifgc S ||z  }||z   }t        | ||||      }||d|ifgz  }M |S c c}w c c}w c c}w )ai  
    Generate tiles from a FASTA file.

    Parameters
    ----------
    fapath: str
        The filepath of the FASTA file
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0) identifying the tiles
        to be retrieved
    chromsizes_map: {uid: []}
        A set of chromsizes listings corresponding to the parameters of the
        tile_ids. To be used if a chromsizes id is passed in with the tile id
        with the `|cos:id` tag in the tile id
    chromsizes: [[chrom, size],...]
        A 2d array containing chromosome names and sizes. Overrides the
        chromsizes in chromsizes_map
    max_tile_width: int
        How wide can each tile be before we return no data. This
        can be used to limit the amount of data returned.
    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    |r   Nr   .   :)indexcosr   errorz1Tile too large, no data returned. Max tile size: sequence)splitr?   mapr#   r   r   r   r   r   r$   rF   )r   tile_idschromsizes_mapr   max_tile_widthgenerated_tilestile_idtile_option_partstile_no_optionstile_id_partstile_positionotile_optionsr)   
chromnameschromlengthschromsizes_to_usechromsizes_idrA   tile_pos	max_depthr    r3   r4   tiles                            r   tilesrd   f   s   4 O +;#MM#.qr2!--,Q/'--c2SmAa&89:3DEaQWWS\EF(231!A$3J3/9:!C!I:L: "		,j I M$ ,U 3.$2=$A!$(!"1%
 # $ .v 6&'8)D	i*&< ==	i.8 #TUcTd!e  y(	i'fj)WFWXWz4&89::W+;X M F 4:s   'E
EEc                     	 t        |       }g }|j                         D ]  \  }}|j                  ||g        |S # t        $ r4}t        j                  |       t        dj                  |            d}~ww xY w)a*  
    Get a list of chromosome sizes from this [presumably] fasta
    file.

    Parameters:
    -----------
    filename: string
        The filename of the fasta file

    Returns
    -------
    chromsizes: [(name:string, size:int), ...]
        An ordered list of chromosome names and sizes
    z-Error loading chromsizes from bigwig file: {}N)r   r"   append	ExceptionloggerrN   format)filenamechrom_seriesdatar'   r(   exs         r   r   r      s}    T%h/'--/ 	'KE4KK&	' TRGNNrRSSTs   9< 	A9/A44A9r   )pyfaidxr   numpyr.   pandasr   loggingutilsr   r   	getLogger__name__rh   r$   r   r*   r<   rF   rd   r        r   <module>rw      s[        0			8	$	.'T$6 8< ,.$t GTTrv   