o
    Nrfe                     @  s(  d dl mZ d dlZd dlZd dlZd dlZd dlZ	d dl
mZ d dlmZ ddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZmZmZ d	d
gZh dZG dd	 d	Z d9ddZ!	 		d:d;ddZ"	 		d:d;ddZ#	 			d<d=d!d"Z$	#d>d?d&d
Z%			#	#			#	'	d@dAd7d8Z&dS )B    )annotationsN)is_integer_dtype)
coo_matrix   )	CSRReaderDirectRangeQuery2DFillLowerRangeQuery2DRangeSelector1DRangeSelector2Dgetregion_to_extentregion_to_offset)list_coolers)closing_hdf5	open_hdf5parse_cooler_uriparse_regionCoolerannotate>   ZKRZVC_SQRTZVCc                   @  s   e Zd ZdZdKdLddZdMddZdNddZdOddZdPdQddZe	dRddZ
e	dSddZe	dTd d!Ze	dUd#d$ZdVd(d)ZdWd+d,Ze	dXd-d.Ze	dYd/d0ZdZd2d3ZdZd4d5Zd[d\d9d:Z		;	6	6	6	;		<d]d^dGdHZdRdIdJZdS )_r   a]  
    A convenient interface to a cooler data collection.

    Parameters
    ----------
    store : str, :py:class:`h5py.File` or :py:class:`h5py.Group`
        Path to a cooler file, URI string, or open handle to the root HDF5
        group of a cooler data collection.
    root : str, optional [deprecated]
        HDF5 Group path to root of cooler group if ``store`` is a file.
        This option is deprecated. Instead, use a URI string of the form
        :file:`<file_path>::<group_path>`.
    kwargs : optional
        Options to be passed to :py:class:`h5py.File()` upon every access.
        By default, the file is opened with the default driver and mode='r'.

    Notes
    -----
    If ``store`` is a file path, the file will be opened temporarily in
    when performing operations. This allows :py:class:`Cooler` objects to be
    serialized for multiprocess and distributed computations.

    Metadata is accessible as a dictionary through the :py:attr:`info`
    property.

    Table selectors, created using :py:meth:`chroms`, :py:meth:`bins`, and
    :py:meth:`pixels`, perform range queries over table rows,
    returning :py:class:`pd.DataFrame` and :py:class:`pd.Series`.

    A matrix selector, created using :py:meth:`matrix`, performs 2D matrix
    range queries, returning :py:class:`numpy.ndarray` or
    :py:class:`scipy.sparse.coo_matrix`.

    Nstorestr | h5py.Grouproot
str | Nonec                 K  s   t |trM|d u rt|\| _| _n*t|r8t|fi |}|jj| _|| _W d    n1 s2w   Y  nt	d| jd | j | _
| j| _|| _n|jj| _|j| _| jd | j | _
|j| _i | _|   d S )Nz!Not a valid path to a Cooler file::)
isinstancestrr   filenamer   h5pyZis_hdf5r   file
ValueErrorurir   open_kwsname_refresh)selfr   r   kwargsh5 r'   S/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cooler/api.py__init__D   s&   



zCooler.__init__returnNonec                 C  s   zXt | jfi | jC}|| j }t|}|d t|d< |dd | _t	t
|d tt|| _t|| _| jdd}|dk| _W d    W d S 1 sQw   Y  W d S  ty~   d| j d}t| j}t|ry|d| dd	 7 }t|d w )
Nr"   lengthstorage-modesymmetric-upperzNo cooler found at: .z Coolers found in z. z Use '::' to specify a group path)r   r   r!   r   chromsastypeobjectZ	set_index_chromsizesdictziprangelen	_chromidsinfo_infor   _is_symm_upperKeyErrorr   )r$   r&   grpZ_ctmodeerr_msgZlistingr'   r'   r(   r#   Z   s,   

&	


zCooler._refreshpathr   
np.ndarrayc                 C  sR   t | jfi | j}|| j }|| d d  W  d    S 1 s"w   Y  d S N)r   r   r!   r   r$   r@   r&   r=   r'   r'   r(   
_load_dseto   s   
$zCooler._load_dsetr4   c                 C  sP   t | jfi | j}|| j }t|| jW  d    S 1 s!w   Y  d S rB   )r   r   r!   r   r4   attrsrC   r'   r'   r(   _load_attrst   s   
$zCooler._load_attrsrr>   
h5py.Groupc                 K  s$   t j| j|fi || j }t|S )a  Open the HDF5 group containing the Cooler with :py:mod:`h5py`

        Functions as a context manager. Any ``open_kws`` passed during
        construction are ignored.

        Parameters
        ----------
        mode : str, optional [default: 'r']
            * ``'r'`` (readonly)
            * ``'r+'`` or ``'a'`` (read/write)

        Notes
        -----
            For other parameters, see :py:class:`h5py.File`.

        )r   Filer   r   r   )r$   r>   r%   r=   r'   r'   r(   openy   s   zCooler.openc                 C  s   | j ddS )zIndicates whether ordinary sparse matrix encoding is used
        (``"square"``) or whether a symmetric matrix is encoded by storing only
        the upper triangular elements (``"symmetric-upper"``).
        r-   r.   )r:   r   r$   r'   r'   r(   storage_mode   s   zCooler.storage_mode
int | Nonec                 C  s
   | j d S )z-Resolution in base pairs if uniform else Nonezbin-sizer:   rK   r'   r'   r(   binsize   s   
zCooler.binsize	pd.Seriesc                 C  s   | j S )z=Ordered mapping of reference sequences to their lengths in bp)r3   rK   r'   r'   r(   
chromsizes   s   zCooler.chromsizes	list[str]c                 C  s   t | jjS )z List of reference sequence names)listr3   indexrK   r'   r'   r(   
chromnames   s   zCooler.chromnamesregionstr | tuple[str, int, int]intc                 C  \   t | jfi | j}|| j }t|| jt|| j| jW  d   S 1 s'w   Y  dS )a'  Bin ID containing the left end of a genomic region

        Parameters
        ----------
        region : str or tuple
            Genomic range

        Returns
        -------
        int

        Examples
        --------
        >>> c.offset('chr3')  # doctest: +SKIP
        1311

        N)	r   r   r!   r   r   r8   r   r3   rO   r$   rV   r&   r=   r'   r'   r(   offset      

$zCooler.offsettuple[int, int]c                 C  rY   )aG  Bin IDs containing the left and right ends of a genomic region

        Parameters
        ----------
        region : str or tuple
            Genomic range

        Returns
        -------
        2-tuple of ints

        Examples
        --------
        >>> c.extent('chr3')  # doctest: +SKIP
        (1311, 2131)

        N	r   r   r!   r   r   r8   r   r3   rO   rZ   r'   r'   r(   extent   r\   zCooler.extentc                 C  sJ   t | jfi | j}|| j }t|W  d   S 1 sw   Y  dS )zUFile information and metadata

        Returns
        -------
        dict

        N)r   r   r!   r   r9   )r$   r&   r=   r'   r'   r(   r9      s   	
$zCooler.infoc                 C  s   | j d fd S )Nnbins   rN   rK   r'   r'   r(   shape   s   zCooler.shaper	   c                   s"    fdd}t d|djd S )z[Chromosome table selector

        Returns
        -------
        Table selector

        c                   X   t jfi j}|j }t|||| fi  W  d    S 1 s%w   Y  d S rB   )r   r   r!   r   r0   fieldslohir&   r=   r%   r$   r'   r(   _slice      
$zCooler.chroms.<locals>._sliceNZnchromsr	   r:   )r$   r%   ri   r'   rh   r(   r0      s   	zCooler.chromsc                   s.    fdd}fdd}t d||jd S )zTBin table selector

        Returns
        -------
        Table selector

        c                   rc   rB   )r   r   r!   r   binsrd   rh   r'   r(   ri     rj   zCooler.bins.<locals>._slicec                   s\   t  jfi  j}| j }t| jt|  j jW  d    S 1 s'w   Y  d S rB   r^   )rV   r&   r=   rK   r'   r(   _fetch	  s   

$zCooler.bins.<locals>._fetchNr`   rk   )r$   r%   ri   rm   r'   rh   r(   rl      s   	
zCooler.binsFjoinboolc                   s0    fdd}fdd}t d||jd S )a  Pixel table selector

        Parameters
        ----------
        join : bool, optional
            Whether to expand bin ID columns into chrom, start, and end
            columns. Default is ``False``.

        Returns
        -------
        Table selector

        c                   sZ   t jfi j}|j }t||||  fi W  d    S 1 s&w   Y  d S rB   )r   r   r!   r   pixelsrd   rn   r%   r$   r'   r(   ri   $  s   
$zCooler.pixels.<locals>._slicec                   s   t  jfi  j0}| j }t| jt|  j j\}}|d d | }|d d | }||fW  d    S 1 s=w   Y  d S )NZindexesZbin1_offsetr^   )rV   r&   r=   i0i1rf   rg   rK   r'   r(   rm   )  s   

$zCooler.pixels.<locals>._fetchNZnnzrk   )r$   rn   r%   ri   rm   r'   rq   r(   rp     s   zCooler.pixelsT逖 fieldbalance
bool | strsparse	as_pixelsignore_indexdivisive_weightsbool | None	chunksizer
   c	                   sV   t v r
du r
d fdd}	d	fdd	}
t||	|
jd fd S )
a  Contact matrix selector

        Parameters
        ----------
        field : str, optional
            Which column of the pixel table to fill the matrix with. By
            default, the 'count' column is used.
        balance : bool, optional
            Whether to apply pre-calculated matrix balancing weights to the
            selection. Default is True and uses a column named 'weight'.
            Alternatively, pass the name of the bin table column containing
            the desired balancing weights. Set to False to return untransformed
            counts.
        sparse: bool, optional
            Return a scipy.sparse.coo_matrix instead of a dense 2D numpy array.
        as_pixels: bool, optional
            Return a DataFrame of the corresponding rows from the pixel table
            instead of a rectangular sparse matrix. False by default.
        join : bool, optional
            If requesting pixels, specifies whether to expand the bin ID
            columns into (chrom, start, end). Has no effect when requesting a
            rectangular matrix. Default is True.
        ignore_index : bool, optional
            If requesting pixels, don't populate the index column with the
            pixel IDs to improve performance. Default is True.
        divisive_weights : bool, optional
            Force balancing weights to be interpreted as divisive (True) or
            multiplicative (False). Weights are always assumed to be
            multiplicative by default unless named KR, VC or SQRT_VC, in which
            case they are assumed to be divisive by default.

        Returns
        -------
        Matrix selector

        Notes
        -----
        If ``as_pixels=True``, only data explicitly stored in the pixel table
        will be returned: if the cooler's storage mode is symmetric-upper,
        lower triangular elements will not be generated. If
        ``as_pixels=False``, those missing non-zero elements will
        automatically be filled in.

        NTc                   sf   t jfi j}|j }t||||||  jW  d    S 1 s,w   Y  d S rB   )r   r   r!   r   matrixr;   )ru   rr   rs   j0j1r&   r=   ry   rv   r}   r{   rz   rn   r$   rx   r'   r(   ri   r  s&   
$zCooler.matrix.<locals>._slicec           	        s   t  jfi  j;}| j }|d u r| }t|  j}t| j}t| j| j\}}t| j| j\}}||||fW  d    S 1 sHw   Y  d S rB   )	r   r   r!   r   r   r3   r   r8   rO   )	rV   Zregion2r&   r=   Zregion1rr   rs   r   r   rK   r'   r(   rm     s   

$zCooler.matrix.<locals>._fetchr`   ra   rB   )_4DN_DIVISIVE_WEIGHTSr
   r:   )r$   ru   rv   rx   ry   rn   rz   r{   r}   ri   rm   r'   r   r(   r~   8  s
   7zCooler.matrixc                 C  sB   t | jtrtj| j}| d| j }nt| j}d| dS )Nr   z	<Cooler "z">)r   r   r   osr@   basenamer   repr)r$   r   	containerr'   r'   r(   __repr__  s
   
zCooler.__repr__rB   )r   r   r   r   )r*   r+   )r@   r   r*   rA   )r@   r   r*   r4   )rG   )r>   r   r*   rH   )r*   r   )r*   rM   )r*   rP   )r*   rR   )rV   rW   r*   rX   )rV   rW   r*   r]   )r*   r4   )r*   r]   )r*   r	   F)rn   ro   r*   r	   )NTFFFTNrt   )ru   r   rv   rw   rx   ro   ry   ro   rn   ro   rz   ro   r{   r|   r}   rX   r*   r
   )__name__
__module____qualname____doc__r)   r#   rD   rF   rJ   propertyrL   rO   rQ   rU   r[   r_   r9   rb   r0   rl   rp   r~   r   r'   r'   r'   r(   r       sD    #






%[r&   rH   r*   r4   c              	   C  sP   i }| j  D ]\}}t|tr!zt|}W n	 ty    Y nw |||< q|S )z
    File and user metadata dict.

    Parameters
    ----------
    h5 : :py:class:`h5py.File` or :py:class:`h5py.Group`
        Open handle to cooler file.

    Returns
    -------
    dict

    )rE   itemsr   r   jsonloadsr   )r&   dkvr'   r'   r(   r9     s   

r9   rf   rX   rg   rM   re   list[str] | Nonepd.DataFramec                 K  sH   |du rt ddgt | d   }t| d |||fi |S )a  
    Table describing the chromosomes/scaffolds/contigs used.
    They appear in the same order they occur in the heatmap.

    Parameters
    ----------
    h5 : :py:class:`h5py.File` or :py:class:`h5py.Group`
        Open handle to cooler file.
    lo, hi : int, optional
        Range of rows to select from the table.
    fields : sequence of str, optional
        Subset of columns to select from table.

    Returns
    -------
    :py:class:`DataFrame`

    Nr"   r,   r0   )pdIndexappendkeysdrop_duplicatesr   )r&   rf   rg   re   r%   r'   r'   r(   r0     s   r0   c           	      K  s   |du rt g dt | d   }t| d |||fi |}d|v rb|dd}t|tr6|}n|d }t|j	rb|rbt
| dd}t jj||dd	}t|tr^t ||j}|S ||d< |S )
a  
    Table describing the genomic bins that make up the axes of the heatmap.

    Parameters
    ----------
    h5 : :py:class:`h5py.File` or :py:class:`h5py.Group`
        Open handle to cooler file.
    lo, hi : int, optional
        Range of rows to select from the table.
    fields : sequence of str, optional
        Subset of columns to select from table.

    Returns
    -------
    :py:class:`DataFrame`

    Nchromstartendrl   r   convert_enumTr"   )re   )Zordered)r   r   r   r   r   r   r   r   r   Zdtyper0   ZCategoricalZ
from_codesZSeriesrT   )	r&   rf   rg   re   r%   outr   Z	chrom_colrU   r'   r'   r(   rl     s&   	

rl   Trn   ro   c                 K  s|   |du rt ddgt | d   }t| d |||fi |}|r<t| d ddg dfi |}t||dd	}|S )
aG  
    Table describing the nonzero upper triangular pixels of the Hi-C contact
    heatmap.

    Parameters
    ----------
    h5 : :py:class:`h5py.File` or :py:class:`h5py.Group`
        Open handle to cooler file.
    lo, hi : int, optional
        Range of rows to select from the table.
    fields : sequence of str, optional
        Subset of columns to select from table.
    join : bool, optional
        Whether or not to expand bin ID columns to their full bin description
        (chrom, start, end). Default is True.

    Returns
    -------
    :py:class:`DataFrame`

    Nbin1_idbin2_idrp   rl   r   r   Treplace)r   r   r   r   r   r   r   )r&   rf   rg   re   rn   r%   dfrl   r'   r'   r(   rp     s   rp   Fpd.DataFrame | RangeSelector1Dr   c                   s  | j  t|trdd }ndd }g }d v rb| d  jtjddd}t|dkr/d }}nt|t| krA| |	 }}nd	\}}||||}|
|j||jd   jd
d djdd d v r| d  jtjddd}	t|	dkr~d }}nt|t| kr|	 |		 }}nd	\}}||||}
|
|
j|	|
jd   jdd djdd |rÇ fdddD }| j|dd} tjg || jdddd}| j|_|S )a  
    Add bin annotations to a data frame of pixels.

    This is done by performing a relational "join" against the bin IDs of a
    table that describes properties of the genomic bins. New columns will be
    appended on the left of the output data frame.

    .. versionchanged:: 0.8.0
       The default value of ``replace`` changed to False.

    Parameters
    ----------
    pixels : :py:class:`DataFrame`
        A data frame containing columns named ``bin1_id`` and/or ``bin2_id``.
        If columns ``bin1_id`` and ``bin2_id`` are both present in ``pixels``,
        the adjoined columns will be suffixed with '1' and '2' accordingly.
    bins : :py:class:`DataFrame` or DataFrame selector
        Data structure that contains a full description of the genomic bins of
        the contact matrix, where the index corresponds to bin IDs.
    replace : bool, optional
        Remove the original ``bin1_id`` and ``bin2_id`` columns from the
        output. Default is False.

    Returns
    -------
    :py:class:`DataFrame`
    c                 S  s    | ||d ur|d  S d  S )Nr   r'   )selbegr   r'   r'   r(   
_loc_slice_  s    zannotate.<locals>._loc_slicec                 S  s   | j || S rB   )loc)r   r   r   r'   r'   r(   r   e  s   r   Fsafe)copyZcastingr   )r   Nc                 S     | d S )N1r'   xr'   r'   r(   <lambda>z      zannotate.<locals>.<lambda>columnsT)dropr   c                 S  r   )N2r'   r   r'   r'   r(   r     r   c                   s   g | ]}| v r|qS r'   r'   ).0colr   r'   r(   
<listcomp>  s    zannotate.<locals>.<listcomp>)r   r   r   )Zaxis)r   r   r	   Zto_numpyr1   npZint64r7   minmaxr   ZilocrT   renameZreset_indexr   r   concat)rp   rl   r   r   annsZbin1ZbminbmaxZann1Zbin2Zann2Zcols_to_dropr   r'   r   r(   r   <  sR   



rt   rr   rs   r   r   ru   r   rv   rw   rx   ry   rz   r{   r}   
fill_lower&np.ndarray | coo_matrix | pd.DataFramec                 C  s  |du rd}t |tr|}n|rd}|r&|| d vr&td| dd d t| d	 | d
 dd }|rt||||||f||
 d}| }|rt|  |g }t||dd}|rsd||d   ||d < d||d   ||d < ||d  ||d   ||  |d< |	rt|  g d }t||dd}|S |r|rt	||||||f|}nt||||||f|}|
 }|r| d | }||| }||f||fkr|n||| }|rd| }d| }||j ||j  |j |_|S |rt	||||||f|}nt||||||f|}| }|rA| d | }||| }||f||fkr(|n||| }|r9d| }d| }|t|| }|S )a  
    Two-dimensional range query on the Hi-C contact heatmap.
    Depending on the options, returns either a 2D NumPy array, a rectangular
    sparse ``coo_matrix``, or a data frame of pixels.

    Parameters
    ----------
    h5 : :py:class:`h5py.File` or :py:class:`h5py.Group`
        Open handle to cooler file.
    i0, i1 : int, optional
        Bin range along the 0th (row) axis of the heatmap.
    j0, j1 : int, optional
        Bin range along the 1st (col) axis of the heatmap.
    field : str, optional
        Which column of the pixel table to fill the matrix with. By default,
        the 'count' column is used.
    balance : bool, optional
        Whether to apply pre-calculated matrix balancing weights to the
        selection. Default is True and uses a column named 'weight'.
        Alternatively, pass the name of the bin table column containing the
        desired balancing weights. Set to False to return untransformed counts.
    sparse: bool, optional
        Return a scipy.sparse.coo_matrix instead of a dense 2D numpy array.
    as_pixels: bool, optional
        Return a DataFrame of the corresponding rows from the pixel table
        instead of a rectangular sparse matrix. False by default.
    join : bool, optional
        If requesting pixels, specifies whether to expand the bin ID columns
        into (chrom, start, end). Has no effect when requesting a rectangular
        matrix. Default is True.
    ignore_index : bool, optional
        If requesting pixels, don't populate the index column with the pixel
        IDs to improve performance. Default is True.

    Returns
    -------
    ndarray, coo_matrix or DataFrame

    Notes
    -----
    If ``as_pixels=True``, only data explicitly stored in the pixel table
    will be returned: if the cooler's storage mode is symmetric-upper,
    lower triangular elements will not be generated. If ``as_pixels=False``,
    those missing non-zero elements will automatically be filled in.

    Ncountweightrl   zNo column 'bins/'z(found. Use ``cooler.balance_cooler`` to z1calculate balancing weights or set balance=False.rp   zindexes/bin1_offset)Zreturn_indexFr   r   r   r   Zbalancedr   T)r   r   r   r   r   Zto_framer   rl   r   r   Zto_sparse_matrixrowr   dataZto_arrayr   outer)r&   rr   rs   r   r   ru   rv   rx   ry   rn   rz   r{   r}   r   r"   readerZenginer   weightsZdf2rl   matZbias1Zbias2Zarrr'   r'   r(   r~     st   >

$ "r~   )r&   rH   r*   r4   )r   NN)
r&   rH   rf   rX   rg   rM   re   r   r*   r   )r   NNT)r&   rH   rf   rX   rg   rM   re   r   rn   ro   r*   r   r   )rp   r   rl   r   r   ro   r*   r   )	NTFFTTFrt   T)r&   rH   rr   rX   rs   rX   r   rX   r   rX   ru   r   rv   rw   rx   ro   ry   ro   rn   ro   rz   ro   r{   ro   r}   rX   r   ro   r*   r   )'
__future__r   r   r   numpyr   Zpandasr   Z
simplejsonr   Zpandas.api.typesr   Zscipy.sparser   corer   r   r   r	   r
   r   r   r   Zfileopsr   utilr   r   r   r   __all__r   r   r9   r0   rl   rp   r   r~   r'   r'   r'   r(   <module>   sT    (
  
~$:.c