
    DUfkc                        d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlmZmZ ddlmZmZ dodZdpdZdqdZdqdZdrdZ	 dsdtdZ ej         dej!                  fdud Z"dvd$Z#dwd'Z$	 	 dxdyd1Z%dzd3Z&d{d7Z'd|d:Z(e(Z)d}d=Z*d~d@Z+ddAZ,ddDZ-ddHZ.	 dsddKZ/ddMZ0dsddPZ1e	 dddV            Z2 G dW dXej3                  Z4dd\Z5dsd]Z6dddej7        fd^Z8dd_Z9	 dsddgZ: G dh da          Z;	 dddnZ<dS )    )annotationsN)OrderedDictdefaultdict)contextmanager)IOAnyContextManagerIterableIterator)
is_integer	is_scalar   )GenomicRangeSpecifierGenomicRangeTuplestartintstopstepreturnIterator[tuple[int, int]]c                B    fdt          |           D             S )a  Partition an integer interval into equally-sized subintervals.
    Like builtin :py:func:`range`, but yields pairs of end points.

    Examples
    --------
    >>> for lo, hi in partition(0, 9, 2):
           print(lo, hi)
    0 2
    2 4
    4 6
    6 8
    8 9

    c              3  B   K   | ]}|t          |z             fV  d S N)min).0ir   r   s     H/var/www/html/software/conda/lib/python3.11/site-packages/cooler/util.py	<genexpr>zpartition.<locals>.<genexpr>    s6      GGQAHd##$GGGGGG    )range)r   r   r   s    ``r   	partitionr!      s0     HGGGGeE4.F.FGGGGr   sstrtuple[str, str]c                    |                      d          }t          |          dk    r|d         d}}nBt          |          dk    r |\  }}|                    d          sd|z   }nt          d          ||fS )zW
    Parse a Cooler URI string

    e.g. /path/to/mycoolers.cool::/path/to/cooler

    z::r   r   /   zInvalid Cooler URI string)splitlen
startswith
ValueError)r"   parts	file_path
group_paths       r   parse_cooler_urir/   #   s     GGDMME
5zzQ %a#:			Uq %	:$$S)) 	*z)J4555j  r   c                H    t          |                     dd                    S )N, )r   replace)r"   s    r   atoir4   6   s    qyyb!!"""r   c                   t          j        d          }|                    |                     dd                    \  }}}t	          |          st          |          S t          |          }|                                                                }|dv r|dz  }n'|dv r|dz  }n|dv r|d	z  }nt          d
| d          t          |          S )Nz
([0-9,.]+)r1   r2   )KKBi  )MMBi@B )GGBi ʚ;zUnknown unit '')
recompiler(   r3   r)   r   floatupperstripr+   )r"   _NUMERIC_RE_valueunits        r   parse_humanizedrF   :   s    *\**K &&qyyb'9'9::NAudt99 5zz%LLE::<<D{						1$111222u::r   "tuple[str, int | None, int | None]c                (   d }d fd}|                      d          }|d                                         }t          |          st          d          t          |          dk     r|ddfS  | ||d	                             \  }}|||fS )
ac  
    Parse a UCSC-style genomic region string into a triple.

    Parameters
    ----------
    s : str
        UCSC-style string, e.g. "chr5:10,100,000-30,000,000". Ensembl and FASTA
        style sequence names are allowed. End coordinate must be greater than
        or equal to start.

    Returns
    -------
    (str, int or None, int or None)

    c              3    K   g d}d                     d |D                       }t          j        d| t          j                  }|                    |           D ]"}|j        }||                    |          fV  #d S )N))HYPHEN-)COORDz[0-9,]+(\.[0-9]*)?(?:[a-z]+)?)OTHERz.+z|\s*c                8    g | ]}d |d          d|d          dS )z(?P<r   >r   ) )r   pairs     r   
<listcomp>z:parse_region_string.<locals>._tokenize.<locals>.<listcomp>d   s4    TTT <Q < <$q' < < <TTTr   z\s*)joinr=   r>   
IGNORECASEfinditer	lastgroupgroup)r"   
token_specpattern	tok_regexmatchtyps         r   	_tokenizez&parse_region_string.<locals>._tokenize^   s      
 
 


 ,,TTTTTUUJ/g//??	''** 	( 	(E/Cu{{3'''''''	( 	(r   c                    | 5t          d                    d                    |                              | |vrt          d| d          d S )NzExpected {} token missingz or zUnexpected token "")r+   formatrT   )r]   tokenexpecteds      r   _check_tokenz)parse_region_string.<locals>._check_tokenj   s]    ;8??H@U@UVVWWW("" !>e!>!>!>??? #"r   c                D   t          | d          \  }} ||dg           t          |          }t          | d          \  }} ||dg           t          | d          \  }}||d fS  ||dg           t          |          }||k     rt          d          ||fS )N)NNrL   rJ   zEnd coordinate less than start)nextrF   r+   )tokensr]   rb   r   endrd   s        r   _expectz$parse_region_string.<locals>._expectq   s    &,//
US%'+++&&&,//
US%(,,,&,//
U;$;S%'+++e$$;;=>>>czr   :r   zChromosome name cannot be emptyr'   Nr   )r(   rA   r)   r+   )r"   r^   ri   r,   chromr   rh   rd   s          @r   parse_region_stringrl   M   s    "
( 
( 
(@ @ @    & GGCLLE!HNNEu:: <:;;;
5zzA~~tT""58,,--JE35#r   regr   
chromsizesdict | pd.Series | Noner   c                   t          | t                    rt          |           \  }}}n,| \  }}}|t          |          n|}|t          |          n|}	 |||         nd}n%# t          $ r}t          d|           |d}~ww xY w|dn|}||t          d          |}||k     rt          d          |dk     s|||k    rt          d| d| d          |||fS )	aU  
    Genomic regions are represented as half-open intervals (0-based starts,
    1-based ends) along the length coordinate of a contig/scaffold/chromosome.

    Parameters
    ----------
    reg : str or tuple
        UCSC-style genomic region string, or
        Triple (chrom, start, end), where ``start`` or ``end`` may be ``None``.
    chromsizes : mapping, optional
        Lookup table of scaffold lengths to check against ``chrom`` and the
        ``end`` coordinate. Required if ``end`` is not supplied.

    Returns
    -------
    A well-formed genomic region triple (str, int, int)

    NzUnknown sequence label: r   z Cannot determine end coordinate.zEnd cannot be less than startzGenomic region out of bounds: [z, rP   )
isinstancer#   rl   r   KeyErrorr+   )rm   rn   rk   r   rh   clenes          r   parse_regionru      sA   , #s 3/44uccuc#/E


U/c#hhhsD$.$:z%   D D D;E;;<<!CD AAEE
{<?@@@
U{{8999qyyT%#**J5JJCJJJKKK%s   A$ $
B.BBz(\d+)tuplec                Z    t          d |                    |           D                       S )Nc                \    g | ])}||                                 rt          |          n|*S rQ   )isdigitr   r   xs     r   rS   znatsort_key.<locals>.<listcomp>   s4    PPP1aPAIIKK.#a&&&QPPPr   )rv   r(   )r"   	_NS_REGEXs     r   natsort_keyr}      s+    PP	8J8JPPPQQQr   iterableIterable[str]	list[str]c                .    t          | t                    S )N)key)sortedr}   )r~   s    r   	natsortedr      s    (,,,,r   array
np.ndarrayc                    t          j        |           } t          |           st          j        g t                    S t          t          d | D                        }t          j        |d d d                   S )Ndtypec              3  4   K   | ]}t          |          V  d S r   )r}   rz   s     r   r   zargnatsort.<locals>.<genexpr>   s(      55!{1~~555555r   )npasarrayr)   r   r   rv   ziplexsort)r   colss     r   
argnatsortr      so    JuEu:: 'x#&&&&55u555677D:d44R4j!!!r   z^chr[0-9]+$z	^chr[XY]$z^chrM$Ffilepath_orstr | IO[str]name_patternstuple[str, ...]	all_namesbool	pd.Seriesc                   t          | t                    r+|                     d          r|                    dd           t	          j        | fdddgddgdt          id	|}|sxg }|D ]]}||d         j                            |                   }|j        t          |d                            }|	                    |           ^t	          j
        |d
          }|d         j        |_        |d         S )at  
    Parse a ``<db>.chrom.sizes`` or ``<db>.chromInfo.txt`` file from the UCSC
    database, where ``db`` is a genome assembly name.

    Parameters
    ----------
    filepath_or : str or file-like
        Path or url to text file, or buffer.
    name_patterns : sequence, optional
        Sequence of regular expressions to capture desired sequence names.
        Each corresponding set of records will be sorted in natural order.
    all_names : bool, optional
        Whether to return all contigs listed in the file. Default is
        ``False``.

    Returns
    -------
    :py:class:`pandas.Series`
        Series of integer bp lengths indexed by sequence name.

    References
    ----------
    * `UCSC assembly terminology <http://genome.ucsc.edu/FAQ/FAQdownloads.html#download9>`_
    * `GRC assembly terminology <https://www.ncbi.nlm.nih.gov/grc/help/definitions>`_

    z.gzcompressiongzip	r   r   namelength)sepusecolsnamesr   axis)rq   r#   endswith
setdefaultpdread_csvcontainsilocr   appendconcatvaluesindex)r   r   r   kwargs
chromtabler,   rZ   parts           r   read_chromsizesr      s   @ +s## 1(<(<U(C(C 1-000Ax sm   J  .$ 	 	Gj04==gFFGD9ZV556DLLYu1---
!&)0Jhr   dbc                $    t          d|  dfi |S )zo
    Download chromosome sizes from UCSC as a :py:class:`pandas.Series`, indexed
    by chromosome label.

    z*http://hgdownload.soe.ucsc.edu/goldenPath/z/database/chromInfo.txt.gz)r   )r   r   s     r   fetch_chromsizesr     s1     SRSSS 
  r   r   	filepathsOrderedDict[str, Any]c                X   ddl }t          |          dk    rt          d          t          |          dk    r|                    |d         d          n6i |D ]1}                    |                    |d          j                   2t          fd| D                       }|S )az  
    Load lazy FASTA records from one or multiple files without reading them
    into memory.

    Parameters
    ----------
    names : sequence of str
        Names of sequence records in FASTA file or files.
    filepaths : str
        Paths to one or more FASTA files to gather records from.

    Returns
    -------
    OrderedDict of sequence name -> sequence record

    r   NzNeed at least one filer   T)as_rawc              3  ,   K   | ]}||         fV  d S r   rQ   )r   rk   fas     r   r   zload_fasta.<locals>.<genexpr>.  s,      @@5"U),@@@@@@r   )pyfaidxr)   r+   Fastaupdaterecordsr   )r   r   r   filepathr   r   s        @r   
load_fastar     s    " NNN
9~~1222
9~~]]9Q<]55 ! 	D 	DHIIgmmHTm::BCCCC@@@@%@@@@@GNr   binsizepd.DataFramec                      fd}t          j        t          |                                           dd          }t          j        |d         t           j                  d          |d<   |S )af  
    Divide a genome into evenly sized bins.

    Parameters
    ----------
    chromsizes : Series
        pandas Series indexed by chromosome name with chromosome lengths in bp.
    binsize : int
        size of bins in bp

    Returns
    -------
    bins : :py:class:`pandas.DataFrame`
        Dataframe with columns: ``chrom``, ``start``, ``end``.

    c                    |          }t          t          j        |z                      }t          j        d|dz             z  }||d<   t	          j        | g|z  |d d         |dd          dg d          S Nr   r   r   )rk   r   rh   columns)r   r   ceilaranger   	DataFrame)rk   rs   n_binsbinedgesr   rn   s       r   _eachzbinnify.<locals>._eachD  s    % RWTG^,,--9Q!--7|g&#2#xPQPRPR|TT---
 
 
 	
r   r   Tr   ignore_indexrk   
categoriesordered)r   r   mapkeysCategoricallistr   )rn   r   r   bintables   ``  r   binnifyr   2  s    $
 
 
 
 
 
 yUJOO$5$566QTRRRHd:+;&<&<d  HW Or   fasta_recordsenzymec                X    	 ddl m} ddlm n# t          $ r t	          d          dw xY w                                 }	 t          ||          j        n%# t          $ r}t          d|           |d}~ww xY w fd}t          j        t          ||          dd          S )av  
    Divide a genome into restriction fragments.

    Parameters
    ----------
    fasta_records : OrderedDict
        Dictionary of chromosome names to sequence records.
    enzyme: str
        Name of restriction enzyme (e.g., 'DpnII').

    Returns
    -------
    frags : :py:class:`pandas.DataFrame`
        Dataframe with columns: ``chrom``, ``start``, ``end``.

    r   Nz4Biopython is required to find restriction fragments.zUnknown enzyme name: c                                        t          |          d d                              }t          j        dt          j         |                    dz   t          |          f                             t          j                  }t          |          dz
  }t          j	        | g|z  |d d         |dd          dg d          }|S r   )
Seqr#   r   r_r   r)   astypeint64r   r   )rk   seqcutsn_fragsfragsbioseq
cut_finderr   s        r   r   zdigest.<locals>._eachz  s    jj]51!!!45566uQC11A5s3xx?@GGQQd))a-g'$ss)DHMM---
 
 
 r   Tr   )Bio.RestrictionRestrictionBio.Seqr   ImportErrorr   getattrsearchAttributeErrorr+   r   r   r   )r   r   biorstchromsrt   r   r   r   s   `     @@r   digestr   Z  s   "((((((          B
 
	 !!FBVV,,3

 B B B999::AB	 	 	 	 	 	 	 9S''adCCCCs     -A 
A=%A88A=bins
int | Nonec                l   t                      }|                     dd          D ]^\  }}|                    |d         |d         z
  j        dd                                                    t          |          dk    r dS _t          |          dk    rt          t          |                    S dS )	z
    Infer bin size from a bin DataFrame. Assumes that the last bin of each
    contig is allowed to differ in size from the rest.

    Returns
    -------
    int or None if bins are non-uniform

    rk   T)observedrh   r   Nr   r   )setgroupbyr   r   uniquer)   rf   iter)r   sizes_chromrX   s       r   get_binsizer     s     EEEg==  eElU7^39#2#>EEGGHHHu::>>44 
5zzQDKK   tr   c                   |                      dgd          ddg                             d                              ddd	
          }t          |d                   t          |d                   }}t	          j        ||          S )z
    Infer chromsizes Series from a bin DataFrame. Assumes that the last bin of
    each contig is allowed to differ in size from the rest.

    Returns
    -------
    int or None if bins are non-uniform

    rk   last)keeprh   T)dropr   r   )rk   rh   r   r   data)drop_duplicatesreset_indexrenamer   r   Series)r   r   r   lengthss       r   get_chromsizesr    s     	gYV44gu5EF	$			&::	;	; 
 :f-..Z5I0J0JGF960000r   pd.Series | dictregionc                8   t          ||          \  }}}|                     |          }|dk    s|||         k     r^|d         j                            |d          }||d         j        |d                             |d          z   }|j        ||         }|S )zN
    Range query on a BED-like dataframe with non-overlapping intervals.

    r   rh   rightsider   Nleft)ru   	get_groupr   searchsortedr   )	groupedrn   r
  rk   r   rh   resultlohis	            r   bedslicer    s     %VZ88E5#u%%FqyyC*U+++E]!..u7.CC&/(-::3V:LLLRU#Mr   r{   r   np.ndarray | h5py.Datasetc                b    t          | t          j                  r| nt          j        |           S r   )rq   h5pyDatasetr   r   )r{   s    r   asarray_or_datasetr    s%    1dl++>11A>r   	chunksize)tuple[np.ndarray, np.ndarray, np.ndarray]c                   t           j        }t          |           } t          |           }|dk    rPt          j        g t
                    t          j        g t
                    t          j        g | j                  fS ||}g g }}t           j        }t          d||          D ]}| |||z            } ||dd         |dd         k              dz   }	|d         |k    rt           j	        d|	f         }	|
                    ||	z              |
                    ||	                    |d         }t          j        |          }t          j        t           j	        ||f                   }
t          j        |          }||
|fS )aS  
    Run length encoding.
    Based on http://stackoverflow.com/a/32681075, which is based on the rle
    function from R.

    Parameters
    ----------
    x : 1D array_like
        Input array to encode
    dropna: bool, optional
        Drop all runs of NaNs.

    Returns
    -------
    start positions, run lengths, run values

    r   r   Nr   r   )r   flatnonzeror  r)   r   r   r   nanr    r   r   concatenatediff)r   r  wherenstartsr   last_valr   r{   locsr  s              r   rlencoder(    s{   * NEu%%EE

AAvvHRs###HRs###HRu{+++
 	
 	FFvH1a##  !a)m#$uQqrrUaf_%%)Q485D>Da$hagR5^F##FgbeFAI&''G^F##F7F""r   cmdc                     t           fdt          j        d                             t          j                  D                       S )Nc              3     K   | ]A}t          j        t           j                            |          t           j                  V  Bd S r   )osaccesspathrT   X_OK)r   r.  r)  s     r   r   zcmd_exists.<locals>.<genexpr>  sU         		"',,tS))2733     r   PATH)anyr,  environr(   pathsep)r)  s   `r   
cmd_existsr4    sR        Jv&,,RZ88     r   r  r   c           	     |    t          j        t          j        | t          j        | |          z
            |          S r   )r   medianabs)r  r   s     r   madr8     s/    9RVD29T4#8#88994@@@r   rfpstr | h5py.GroupmodeContextManager[h5py.Group]c              /    K   t          | t                    rd}t          j        | |g|R i |}nfd}|dk    r| j        j        dk    rnK|dv r| j        j        dk    rt          d          |dk    rt          d          |d	v rt          d
          | }	 |V  |r|                                 dS dS # |r|                                 w w xY w)a  
    Context manager like ``h5py.File`` but accepts already open HDF5 file
    handles which do not get closed on teardown.

    Parameters
    ----------
    fp : str or ``h5py.File`` object
        If an open file object is provided, it passes through unchanged,
        provided that the requested mode is compatible.
        If a filepath is passed, the context manager will close the file on
        tear down.

    mode : str
        * r        Readonly, file must exist
        * r+       Read/write, file must exist
        * a        Read/write if exists, create otherwise
        * w        Truncate if exists, create otherwise
        * w- or x  Fail if exists, create otherwise

    TFr9  r+)r?  az%File object provided is not writeablewzCannot truncate open file)zw-r{   zFile existsN)rq   r#   r  Filefiler<  r+   close)r:  r<  argsr   own_fhfhs         r   	open_hdf5rH    s     6 "c Yr41$111&113;;27<4//[  RW\S%8%8DEEES[[8999[  ]+++ 	HHJJJJJ	 	6 	HHJJJJ	s   B6 6Cc                  8     e Zd Zd	 fdZd
dZddZddZ xZS )closing_hdf5grp
h5py.Groupc                T    t                                          |j                   d S r   )super__init__id)selfrK  	__class__s     r   rO  zclosing_hdf5.__init__6  s#         r   r   c                    | S r   rQ   rQ  s    r   	__enter__zclosing_hdf5.__enter__9  s    r   Nonec                4    | j                                         S r   rC  rD  )rQ  exc_infos     r   __exit__zclosing_hdf5.__exit__<  s    y   r   c                8    | j                                          d S r   rX  rT  s    r   rD  zclosing_hdf5.close?  s    	r   )rK  rL  )r   rL  )r   rV  )__name__
__module____qualname__rO  rU  rZ  rD  __classcell__)rR  s   @r   rJ  rJ  5  sy        ! ! ! ! ! !   ! ! ! !       r   rJ  attrsh5py.AttributeManagerdictc                    t          |           }|                                 D ]T\  }}	 |                                ||<   # t          $ r |                                ||<   Y At
          $ r |||<   Y Qw xY w|S r   )rb  itemsitemr+   tolistr   )r`  outkvs       r   attrs_to_jsonablerj  C  s    
u++C  1	VVXXCFF 	  	  	 XXZZCFFF 	 	 	CFFF	Js   A!A6%A65A6c           	     
   t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          dddfdfd	}dfd	t          | d          r| j        S t          | t          j        t          j        f          r| j        dd         S t          | t          j                  r
| dd         S n	dd         t          | t                    r5t          j        fd|                                 D                       S t          | t"                    r-t%          |           dk    r | d         | d                   S t          | t&          t"          f          rYt)          d | D                       st+          d|            t          j        fd| D             d | D                       S t          | d          s)| '	 t          j        |           } |          S #  Y nxY wt/          |           r ||           S t1          d|            )a  
    Extracted and modified from dask/dataframe/utils.py :
        make_meta (BSD licensed)

    Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1

    T    z
1970-01-01r   foo)bVr8   mSr@  UO__UNKNOWN_CATEGORIES__c                ,   | j         dv r|                     d          S | j         dk    r#|                     t          dd                    S | j         v r-| j                  }| j         dv r|                    |           n|S t	          d|            )N)r   fur   cr   )rp  r8   zCan't handle dtype: )kindtypecomplexr   	TypeError)r   o_simple_fake_mappings     r   _scalar_from_dtypez&infer_meta.<locals>._scalar_from_dtype|  s    :((::a== Z3::gamm,,,Z///$UZ0A&+jJ&>&>188E???AE:5::;;;r   c                b   t          | t          j        t          j        t          j        f          r| S t          j        |           rCt          | d          r| j        n t          j        t          |                     } |          S t          dt          |           j         d          )Nr   zCan't handle meta of type 'r<   )rq   r   	Timestamp	TimedeltaPeriodr   isscalarhasattrr   rz  r|  r\  )r{   r   r  s     r   _nonempty_scalarz$infer_meta.<locals>._nonempty_scalar  s    a",bi@AA 	RH[^^ 	R&q'22IAGGa8I8IE%%e,,,PT!WW=MPPPQQQr   Nc                    t          |t                    r=|dk    r7t          j        t          j        g          | |          j        d d         S t          j        g || |          S )Ncategory)r   r   r   )r   r   r   )rq   r#   r   r  r   r   )r   r   r   UNKNOWN_CATEGORIESs      r   _empty_seriesz!infer_meta.<locals>._empty_series  sv    eS!! 	ez&9&99 23444u  2A2  y5t5AAAAr   _metar   c                4    i | ]\  }}| ||           S r   rQ   r   rx  dr  r   s      r   
<dictcomp>zinfer_meta.<locals>.<dictcomp>  s0    IIIVaQa%000IIIr   r  r'   c              3  f   K   | ],}t          |t                    ot          |          d k    V  -dS )r'   N)rq   rv   r)   )r   r   s     r   r   zinfer_meta.<locals>.<genexpr>  s;      CCA:a''7CFFaKCCCCCCr   z2Expected iterable of tuples of (name, dtype), got c                4    i | ]\  }}| ||           S r  rQ   r  s      r   r  zinfer_meta.<locals>.<dictcomp>  s0    AAAVaQa%000AAAr   c                    g | ]\  }}|S rQ   rQ   )r   rx  r  s      r   rS   zinfer_meta.<locals>.<listcomp>  s    %%%41aQ%%%r   r   r   r   z'Don't know how to create metadata from r   )r   bool_void
datetime64timedelta64str_unicode_r  r  rq   r   r  r   r   Indexrb  rd  rv   r)   r   allr+   r   r   r|  )r{   r   r  r   r  r  r  r~  s    `  @@@@r   
infer_metar  O  s   B Xd^^WT]]]<((^AWU^^WU^^[	 	 2	< 	< 	< 	< 	<R R R R RB B B B B B q' w!bi.// vac{	Arx	 	  1v]EEac
E!T 
|IIIIIqwwyyIIIQV
 
 
 	
 !U A!}QqT1Q4u5555	Ae}	%	% CCCCCCC 	KKK   |AAAAAqAAA%%1%%%
 
 
 	

 Q   	Q]	HQKKE%%e,,,	D|| #"""
AaAA
B
BBs   3I Ic                   t          |           } t          t                    st          fd          n^                                }t          fd          |                                D ]#\  }}t          |          r| |         n|}||<   $||du rt          j        g           }	nfd|D             }
t          |
          dk    r#t          j        |
d         |d                   }	n!t          j
                            |
|	          }	|                                 t          |          D ]\  }}|                     ||z
             fd
| D             }t          j        || |	          S )ze
    Extracted and modified from pandas/io/parsers.py :
        _get_empty_meta (BSD licensed).

    c                     p S r   rQ   )default_dtyper   s   r   <lambda>zget_meta.<locals>.<lambda>  s    E$:] r   c                      S r   rQ   )r  s   r   r  zget_meta.<locals>.<lambda>  s    M r   NFc                H    g | ]}t          j        g |                    S r   r   r  )r   r   r   s     r   rS   zget_meta.<locals>.<listcomp>  s,    IIIT	"E$K000IIIr   r   r   )r   )r   c                J    i | ]}|t          j        g |                     S r  r  )r   col_namer   s     r   r  zget_meta.<locals>.<dictcomp>  s.    WWW8")BeHo>>>WWWr   r  )r   rq   rb  r   copyrd  r   r   r  r)   
MultiIndexfrom_arrayssort	enumeratepopr   )r   r   index_columnsindex_namesr  _dtyperh  ri  colr   r  r   r$  col_dicts    `  `         r   get_metar    s    7mmG
 eT"" :::::;; 111122 LLNN 	 	DAq *14'!**1CE#JJ 6 6IIII[IIIt99>>HT!W;q>:::EEM--d+-FFEm,, 	 	DAqKKAWWWWwWWWH<'????r   c                >   t          | d         j        t          j                  }|                                 } |s2t          j        | j        t          |j                  d          | d<   n/| d         j	        j
        |j        k                                    sJ | S )Nrk   Tr   )rq   r   r   CategoricalDtyper  r   rk   r   r   catr   r  )r   rn   is_cats      r   
check_binsr    s    W+R-@AAF99;;D HJ4
(8#9#94
 
 
W W!,
0@@EEGGGGGKr   gsGenomeSegmentationn_chunk_maxfile_contigsloadingslist[int | float] | Nonelist[GenomicRangeTuple]c           
     ,   | j         }|                                }||}|                                }||j        |         z  }|j        |         |z  }g }|D ]\  }	|vr
| j                 }
t          t          j        ||j                 z                      }|	j        j	        d d |         }|d         |
k    rt          j
        ||
f         }|                    fdt          |d d         |dd                    D                        |S )Nr   c              3  (   K   | ]\  }}||fV  d S r   rQ   )r   r   rh   rk   s      r   r   z%balanced_partition.<locals>.<genexpr>  s?       
 
$.E3UE3
 
 
 
 
 
r   r   )_bins_groupedsizeidxmaxlocrn   r   r   r   r   r   r   extendr   )r  r  r  r  r  chrom_nbinschrmaxconstgrangesrX   rs   r   anchorsrk   s                @r   balanced_partitionr    sA    G,,..K__F(,v..HOF#k1EG 

 

u$$}U#2758<#667788+$VVtV,2;$eGTM*G 
 
 
 
25gcrclGABBK2P2P
 
 
 	
 	
 	
 	
 Nr   c                      e Zd ZddZdd	Zd
S )r  rn   r   r   r   c                   t          ||          }|                    ddd          | _        | j                                        j        }|| _        t          |          | _        t          |	                                          | _
        || _        t          j        |	                                t          t          |                              | _        t"          j        dt#          j        |          f         | _        t"          j        dt#          j        |j                  f         | _        | j        |d         j        j                 |d         j        z   | _        d S )Nrk   TF)r   r  r  r   r   )r  r   r  r  r   rn   r   r   r   r   contigsr   r   r  r    r)   idmapr   r   cumsumchrom_binoffsetchrom_absposr  codesstart_abspos)rQ  rn   r   nbins_per_chroms       r   rO  zGenomeSegmentation.__init__  s   $
++!\\'Du\MM,1133:$"4((JOO--..	YZ__%6%6U3z??=S=STTT
!uQ	/(B(B%BCE!RYz/@%A%A"ABd7m/56g9MM 	r   r
  r   r   c                V   t          || j                  \  }}}| j                            |          }|dk    s|| j        |         k     r^|d         j                            |d          }||d         j        |d                              |d          z   }|j        ||         }|S )Nr   rh   r  r  r   r  )ru   rn   r  r  r   r  r   )rQ  r
  rk   r   rh   r  r  r  s           r   fetchzGenomeSegmentation.fetch(  s    (AAuc#--e44199doe444%225w2GGBfWo,RSS1>>s>PPPB[B'Fr   N)rn   r   r   r   )r
  r   r   r   )r\  r]  r^  rO  r  rQ   r   r   r  r    s<        
 
 
 
      r   逖 chunksIterable[pd.DataFrame]r  Iterator[pd.DataFrame]c              #     K   g }d}| D ]K}|t          |          z  }|                    |           ||k    rt          j        |d          V  g }d}Lt          |          rt          j        |d          V  dS dS )a  
    Take an incoming iterator of small data frame chunks and buffer them into
    an outgoing iterator of larger chunks.

    Parameters
    ----------
    chunks : iterator of :py:class:`pandas.DataFrame`
        Each chunk should have the same column names.
    size : int
        Minimum length of output chunks.

    Yields
    ------
    Larger outgoing :py:class:`pandas.DataFrame` chunks made from concatenating
    the incoming ones.

    r   r   N)r)   r   r   r   )r  r  bufr$  chunks        r   bufferedr  2  s      * C	A  	SZZ

5t88)Ca((((((CA
3xx %i!$$$$$$$$% %r   )r   r   r   r   r   r   r   r   )r"   r#   r   r$   )r"   r#   r   r   )r"   r#   r   rG   r   )rm   r   rn   ro   r   r   )r"   r#   r   rv   )r~   r   r   r   )r   r   r   r   )r   F)r   r   r   r   r   r   r   r   )r   r#   r   r   )r   r   r   r#   r   r   )rn   r   r   r   r   r   )r   r   r   r#   r   r   )r   r   r   r   )r   r   r   r   )rn   r	  r
  r   r   r   )r{   r   r   r  )r   r   r  r   r   r  )r)  r#   r   r   )r  r   r   r   r   r   )r9  )r:  r;  r<  r#   r   r=  )r`  ra  r   rb  )r   r   rn   r   r   r   )
r  r  r  r   r  r   r  r  r   r  )r  )r  r  r  r   r   r  )=
__future__r   r,  r=   collectionsr   r   
contextlibr   typingr   r   r	   r
   r   r  numpyr   pandasr   pandas.api.typesr   r   _typingr   r   r!   r/   r4   rF   rl   ru   r>   rr  r}   r   r   r   r   r   r   make_bintabler   r   r  r  r  r(  r4  r8  rH  GrouprJ  rj  r  object_r  r  r  r  r  rQ   r   r   <module>r     s   " " " " " " 				 				 0 0 0 0 0 0 0 0 % % % % % % > > > > > > > > > > > > > >          2 2 2 2 2 2 2 2 = = = = = = = =H H H H$! ! ! !&# # # #   &> > > >F +/. . . . .b #-"*Xrt"<"< R R R R R- - - -" " " " &O2  2  2  2  2 j	 	 	 	   D" " " "J +D +D +D +D\   *1 1 1 1&   $? ? ? ? !0# 0# 0# 0# 0#f   A A A A A  - - - - -`    4:   	 	 	 	nC nC nC nCd tRZ(@ (@ (@ (@V
 
 
 
" *.	    >       : % % % % % % %r   