
    DUf                        d dl Z d dlmZmZmZ d dlZg dZ e j        d          Z	g dZ
 e j        dd                    d e
D                       z   e j                  Zd	eeeef         d
efdZded
efdZded
efdZded
eeeef         fdZd	ed
eeeef         fdZ	 dddd	eeef         deeeej        f                  ded
eeeef         fdZdS )    N)OptionalTupleUnion)parse_regionparse_region_stringis_complete_ucsc_stringto_ucsc_stringz
([0-9,.]+)))HYPHEN-)COORDz[0-9,]+(\.[0-9]*)?(?:[a-z]+)?)OTHERz.+z\s*z|\s*c              #   .   K   | ]\  }}d | d| dV  dS )z(?P<>)N ).0nametokens      T/var/www/html/software/conda/lib/python3.11/site-packages/bioframe/core/stringops.py	<genexpr>r      s:      VVkdE1$11111VVVVVV    grangereturnc                      dj         |  S )z
    Convert a grange to a UCSC string.

    Parameters
    ----------
    grange : tuple or other iterable
        chrom, start, end

    Returns
    -------
    str
        UCSC-style genomic range string, '{chrom}:{start}-{end}'
    z{}:{}-{})format)r   s    r   r	   r	      s     :f%%r   sc                 b    t          | t                    sdS t          |           \  }}}|dS dS )z
    Returns True if a string can be parsed into a completely informative
    (chrom, start, end) format.

    Parameters
    ----------
    s : str

    Returns
    -------
    bool
        True if able to be parsed and ``end`` is known.

    FNT)
isinstancestrr   )r   _ends      r   r   r   ,   s=     a u#A&&IAq#
{u4r   c                    t                               |                     dd                    \  }}}t          |          st	          |          S t          |          }|                                                                }|dv r|dz  }n'|dv r|dz  }n|dv r|dz  }nt          d	| d
          t	          |          S )N, )KKBi  )MMBi@B )GGBi ʚ;zUnknown unit '')	NUMERIC_REGEXsplitreplacelenintfloatupperstrip
ValueError)r   r    valueunits       r   _parse_humanized_intr7   C   s    "((3););<<NAud t99 5zz %LLE::<<D{						1$111222u::r   c                    d }d }|                      d          }|d                                         }t          |          st          d          t          |          dk     r|ddfS  | ||d                             \  }}|||fS )	a.  
    Parse a UCSC-style genomic range string into a triple.

    Parameters
    ----------
    s : str
        UCSC-style genomic range string, e.g. "chr5:10,100,000-30,000,000".

    Returns
    -------
    tuple
        (str, int or None, int or None)

    See also
    --------
    parse_region
    c              3      K   t                               |           D ]"}|j        }||                    |          fV  #d S N)RANGE_REGEXfinditer	lastgroupgroup)r   matchr   s      r   	_tokenizez&parse_region_string.<locals>._tokenizek   sT       ))!,, 	* 	*E?DD)))))))	* 	*r   c                 f   t          | d          \  }}|dk    rt          d| d|           t          |          }t          | d          \  }}|dk    rt          d| d|           t          | d          \  }}|d }n+|dk    rt          |          }nt          d| d|           ||fS )N)NNr   z&Expected COORD; got unexpected token: z: r
   z'Expected HYPHEN; got unexpected token: )nextr4   r7   )token_streamr   r   startr!   s        r   _parse_rangez)parse_region_string.<locals>._parse_rangep   s    <66e7??UdUUeUUVVV$U++<66e8VtVVuVVWWW<66e<CCW__&u--CCUdUUeUUVVVczr   :r   zChromosome name cannot be empty   N   )r-   r3   r/   r4   )r   r@   rE   partschromrD   r!   s          r   r   r   X   s    &* * *
  ( GGCLLE!HNNEu:: <:;;;
5zzA~~tT""iia1122JE3%r   c                     t          |           dk     rt          d          | dd         \  }}}t          |          }|t          |          n|}|t          |          n|}|||fS )a  
    Coerce a genomic range record into a triple.

    Parameters
    ----------
    grange : str or tuple
        * A triple (chrom, start, end), where ``start`` or ``end`` may be
          ``None``.
        * A quadruple or higher-order tuple, e.g. (chrom, start, end, name).
          ``name`` and other fields will be ignored.

    Returns
    -------
    tuple
        A well-formed genomic range triple (str, int, int).
       z-Length of a range record should be at least 3N)r/   r4   r   r0   )r   rJ   rD   r!   s       r   _parse_region_recordrM      su    " 6{{QHIIIrr
E5#JJE+CJJJEo#c(((3C%r   T)check_bounds
chromsizesrN   c                j   t          | t                    rt          |           \  }}}nt          |           \  }}}d}|.	 ||         }n # t          $ r t          d|           w xY w||}|d}|||k     rt          d          |r$|dk     s|||k    rt          d| d| d          |||fS )a  
    Coerce a genomic range string or sequence type into a triple.

    Parameters
    ----------
    grange : str or tuple
        * A UCSC-style genomic range string, e.g. "chr5:10,100,000-30,000,000".
        * A triple (chrom, start, end), where ``start`` or ``end`` may be
          ``None``.
        * A quadruple or higher-order tuple, e.g. (chrom, start, end, name).
          ``name`` and other fields will be ignored.

    chromsizes : dict or Series, optional
        Lookup table of sequence lengths for bounds checking and for
        filling in a missing end coordinate.

    check_bounds : bool, optional [default: True]
        If True, check that the genomic range is within the bounds of the
        sequence.

    Returns
    -------
    tuple
        A well-formed genomic range triple (str, int, int).

    Notes
    -----
    Genomic ranges are interpreted as half-open intervals (0-based starts,
    1-based ends) along the length coordinate of a sequence.

    Sequence names may contain any character except for whitespace and colon.

    The start coordinate should be 0 or greater and the end coordinate should
    be less than or equal to the length of the sequence, if the latter is
    known. These are enforced when ``check_bounds`` is ``True``.

    If the start coordinate is missing, it is assumed to be 0. If the end
    coordinate is missing and chromsizes are provided, it is replaced with the
    length of the sequence.

    The end coordinate **must** be greater than or equal to the start.

    The start and end coordinates may be suffixed with k(b), M(b), or G(b)
    multipliers, case-insentive. e.g. "chr1:1K-2M" is equivalent to
    "chr1:1000-2000000".
    NzUnknown sequence label: r   zEnd cannot be less than startzGenomic range out of bounds: [z, r   )r   r   r   rM   KeyErrorr4   )r   rO   rN   rJ   rD   r!   clens          r   r   r      s   h &# 9/77ucc088uc D	Ae$DD 	A 	A 	A???@@@	A;C }
C%KK8999 Kt'7C$JJI%II3IIIJJJ%s   A A(r:   )retypingr   r   r   pandaspd__all__compiler,   RANGE_TOKEN_SPECjoin
IGNORECASEr;   r   r0   r	   boolr   r7   r   tuplerM   dictSeriesr   r   r   r   <module>r`      s   				 ) ) ) ) ) ) ) ) ) )       
<((    bj
W\\VVEUVVVVVVM &5c3/ &C & & & &"s t    .C C    *73 75c3#7 7 7 7 7t 5c3+?    8 48M 	M M M#u*MtRY/0M 	M
 3S=M M M M M Mr   