
    DUf              	       8   d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ 	 dd	lmZ n# e$ r	 dd	lmZ Y nw xY wdd
lmZ 	 ddlZn# e$ r dZY nw xY w ed          Z ej        d          Z G d de          Z G d de           Z! G d de"          Z# G d de"          Z$ G d de"          Z% G d de&          Z' G d de          Z( G d de           Z) G d de"          Z* G d d e+          Z, G d! d" e	d"g d#                    Z- G d$ d%e+          Z. G d& d'e+          Z/ G d( d)e/          Z0 G d* d+e+          Z1 G d, d-e1          Z2d>d/Z3 G d0 d1          Z4d2 Z5d3Z6 e7d4  e8d5          D                       Z9d.:                    e9          Z;e<=                    e6d         e6d6         e;          Z>e>fZ?d7 Z@d8 ZAd9 ZBd: ZCd; ZDd< ZEeFd=k    rddlGZG eGjH                     dS dS )?z9
Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
    N)
namedtuple)islicezip_longest)ceil)getmtime)TemporaryFile)Lock)version)OrderedDictpyfaidxz%([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)c                       e Zd ZdZdS )KeyFunctionErrorz/Raised if the key_function argument is invalid.N__name__
__module____qualname____doc__     M/var/www/html/software/conda/lib/python3.11/site-packages/pyfaidx/__init__.pyr   r   $   s        9999r   r   c                       e Zd ZdZdS )FastaIndexingErrorz>Raised if we encounter malformed FASTA that prevents indexing.Nr   r   r   r   r   r   (   s        HHHHr   r   c                       e Zd ZdZdS )IndexNotFoundErrorz.Raised if read_fai cannot open the index file.Nr   r   r   r   r   r   ,   s        8888r   r   c                       e Zd ZdZdS )VcfIndexNotFoundErrorz%Raised if vcf cannot find a tbi file.Nr   r   r   r   r   r   0   s        ////r   r   c                       e Zd ZdZdS )FastaNotFoundErrorz*Raised if the fasta file cannot be opened.Nr   r   r   r   r   r   4   s        4444r   r   c                       e Zd ZdZdS )
FetchErrorzBRaised if a request to fetch a FASTA sequence cannot be fulfilled.Nr   r   r   r   r    r    8   s        LLLLr   r    c                       e Zd ZdZdS )BedErrorz Indicates a malformed BED entry.Nr   r   r   r   r"   r"   <   s        ****r   r"   c                       e Zd ZdZdS )RegionErrorzA region error occurred.Nr   r   r   r   r$   r$   @   s         #"""r   r$   c                       e Zd ZdZdS )UnsupportedCompressionFormatzh
    Raised when a FASTA file is given with a recognized but unsupported
    compression extension.
    Nr   r   r   r   r&   r&   F   s           r   r&   c                       e Zd ZdZddZd Zd Zd Zd	 Zd
 Z	d Z
ed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             ZdS )Sequencez
    name = FASTA entry name
    seq = FASTA sequence
    start, end = coordinates of subsequence (optional)
    comp = boolean switch for complement property
     NFc                     || _         || _        || _        || _        || _        t          |t                    sJ t          |t                    sJ d S N)nameseqstartendcomp
isinstancestr)selfr,   r-   r.   r/   r0   s         r   __init__zSequence.__init__U   sW    	
	$$$$$$#s#######r   c           
      .   | j         | j        t          | j                  dk    rd}nt          | j                  t	          | j        | j         z
            dz   k    rd}d}nt          | j                  t	          | j        | j         z
            k    rd}d}ngt          | j                  t	          | j        | j         z
            k    r5t          d| j         d| j        d	t          | j                  d
          t          |t                    r|                    t          |                     \  }}}| j         | j        2d}d}| 	                    | j
        | j        |         ||| j                  S | j        | j         }
}	t	          |          dk    rd}d}n&|dk    r|dk    rd}|	|z
  }|
|z   }n|
|z   }|
|z   |z   }| 	                    | j
        | j        |         ||| j                  S t          |t                    r|dk     rt          |           |z   }| j         r>| 	                    | j
        | j        |         | j         |z   | j         |z   | j                  S | 	                    | j
        | j        |         | j                  S dS )a   Returns a sliced version of Sequence
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=1, end=6)
        >>> x
        >chr1:1-6
        ATCGTA
        >>> x[:3]
        >chr1:1-3
        ATC
        >>> x[3:]
        >chr1:4-6
        GTA
        >>> x[1:-1]
        >chr1:2-5
        TCGT
        >>> x[::-1]
        >chr1:6-1
        ATGCTA
        >>> x[::-3]
        >chr1
        AC
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=0, end=6)
        >>> x
        >chr1:0-6
        ATCGTA
        >>> x[:3]
        >chr1:0-3
        ATC
        >>> x[3:]
        >chr1:3-6
        GTA
        >>> x[1:-1]
        >chr1:1-5
        TCGT
        >>> x[::-1]
        >chr1:6-0
        ATGCTA
        >>> x[::-3]
        >chr1
        AC
        Nr      TFzCoordinates (Sequence.start=z and Sequence.end=z2) imply a different length than Sequence.seq (len=z). Did you modify Sequence.seq?)r.   r/   lenr-   abs
ValueErrorr1   sliceindices	__class__r,   r0   int)r3   ncorrection_factor	one_basedslice_start
slice_stop
slice_stepr.   r/   self_end
self_starts              r   __getitem__zSequence.__getitem__^   s   R :!1S]]a5G5G !
 
DJ&''!+, , I "]]c$(TZ"78888I !]]c$(TZ"78888*:::txxxTX89 9 9 a 	I23))CII2F2F/KZz!TX%5~~di!eS&*i1 1 1$(HdjjH:""r!!##!"J :- :- #[0 :-0AA>>$)TXa[%"&)- - -3 	I1uuIIMz I~~di!dj1n&*j1ndiA A A ~~di!diHHH	I 	Ir   c                     | j         S r+   r-   r3   s    r   __str__zSequence.__str__   s	    xr   c                 "    | ddd         j         S )a&   Returns the reverse compliment of sequence
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=1, end=6)
        >>> x
        >chr1:1-6
        ATCGTA
        >>> y = -x
        >>> y
        >chr1:6-1 (complement)
        TACGAT
        >>> -y
        >chr1:1-6
        ATCGTA
        Nr7   )
complementrJ   s    r   __neg__zSequence.__neg__   s     DDbDz$$r   c                 n    d                     d                     d| j        g          | j        g          S )N
r)   >)join
fancy_namer-   rJ   s    r   __repr__zSequence.__repr__   s.    yy"''3"89948DEEEr   c                 *    t          | j                  S )z<
        >>> len(Sequence('chr1', 'ACT'))
        3
        )r8   r-   rJ   s    r   __len__zSequence.__len__   s    
 48}}r   c                 B    t          |           t          |          k    S )zC
        >>> Sequence('chr1', 'ACT') == 'ACT'
        True
        r2   )r3   others     r   __eq__zSequence.__eq__   s    
 4yyCJJ&&r   c           	          | j         }| j        W| j        Pd                    |d                    t	          | j                  t	          | j                  g          g          }| j        r|dz  }|S )z Return the fancy name for the sequence, including start, end, and complementation.
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=1, end=6, comp=True)
        >>> x.fancy_name
        'chr1:1-6 (complement)'
        N:-z (complement))r,   r.   r/   rR   r2   r0   )r3   r,   s     r   rS   zSequence.fancy_name   sj     y:!dh&:88T388S__c$(mm,L#M#MNOOD9 	$O#Dr   c                 L    d}t          j        |t          d           | j        S )a   DEPRECATED: Use fancy_name instead.
        Return the fancy name for the sequence, including start, end, and complementation.
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=1, end=6, comp=True)
        >>> x.long_name
        'chr1:1-6 (complement)'
        zThe `Sequence.long_name` property is deprecated, and will be removed in future versions. Please use `Sequence.fancy_name` instead.   )
stacklevel)warningswarnDeprecationWarningrS   )r3   msgs     r   	long_namezSequence.long_name   s*     Sc-!<<<<r   c                     |                      | j        t          | j                  | j        | j                  }| j        rdnd|_        |S )z Returns the compliment of self.
        >>> x = Sequence(name='chr1', seq='ATCGTA')
        >>> x.complement
        >chr1 (complement)
        TAGCAT
        )r.   r/   FT)r=   r,   rM   r-   r.   r/   r0   )r3   r0   s     r   rM   zSequence.complement   sN     ~~Iz$(++4:48  M M!Y0EED	r   c                     | ddd         S )z Returns the reverse of self.
        >>> x = Sequence(name='chr1', seq='ATCGTA')
        >>> x.reverse
        >chr1
        ATGCTA
        Nr7   r   rJ   s    r   reversezSequence.reverse   s     DDbDzr   c                 j    | j         | j        k     r	| j        sdS | j         | j        k    r	| j        rdS dS )aB   get the orientation forward=1, reverse=-1
        >>> x = Sequence(name='chr1', seq='ATCGTA', start=1, end=6)
        >>> x.orientation
        1
        >>> x.complement.orientation is None
        True
        >>> x[::-1].orientation is None
        True
        >>> x = -x
        >>> x.orientation
        -1
        r6   r7   N)r.   r/   r0   rJ   s    r   orientationzSequence.orientation
  s?     :   1Z$(""ty"24r   c                    | j                             d          }|| j                             d          z  }| j                             d          }|| j                             d          z  }||z   t          | j                   z  S )z Return the GC content of seq as a float
        >>> x = Sequence(name='chr1', seq='ATCGTA')
        >>> y = round(x.gc, 2)
        >>> y == 0.33
        True
        GgCc)r-   countr8   )r3   rm   ro   s      r   gczSequence.gc  sq     HNN3	TX^^C   HNN3	TX^^C   ATX&&r   c                     t          j        dd| j                                                  t	          fddD                       }|t                    z  S )z Return the GC content of seq as a float, ignoring non ACGT characters
        >>> x = Sequence(name='chr1', seq='NMRATCGTA')
        >>> y = round(x.gc, 2)
        >>> y == 0.33
        True
        z[^ACGT]r)   c              3   B   K   | ]}                     |          V  d S r+   rp   .0itrimSeqs     r   	<genexpr>z%Sequence.gc_strict.<locals>.<genexpr>6  s/      55aq!!555555r   )rl   rn   )resubr-   uppersumr8   r3   rq   rx   s     @r   	gc_strictzSequence.gc_strict-  sW     &R)9)9::5555955555CLL  r   c                    t          j        dd| j                                                  t	          fddD                       }|t	          fddD                       dz  z  }|t	          fdd	D                       d
z  z  }|t	          fddD                       dz  z  }|                    d          dz  z  }|t                    z  S )z Return the GC content of seq as a float, accounting for IUPAC ambiguity 
        >>> x = Sequence(name='chr1', seq='NMRATCGTA')
        >>> y = round(x.gc, 2)
        >>> y == 0.36
        True
        z[^ACGTMRWSYKVHDBN]r)   c              3   B   K   | ]}                     |          V  d S r+   rt   ru   s     r   ry   z$Sequence.gc_iupac.<locals>.<genexpr>B  s/      ::q'--""::::::r   )Srn   rl   c              3   B   K   | ]}                     |          V  d S r+   rt   ru   s     r   ry   z$Sequence.gc_iupac.<locals>.<genexpr>C  /      66q'--""666666r   )BVgq=
ףp?c              3   B   K   | ]}                     |          V  d S r+   rt   ru   s     r   ry   z$Sequence.gc_iupac.<locals>.<genexpr>D  s/      >>q'--"">>>>>>r   )MRYKg      ?c              3   B   K   | ]}                     |          V  d S r+   rt   ru   s     r   ry   z$Sequence.gc_iupac.<locals>.<genexpr>E  r   r   )HDgQ?Ng      ?)rz   r{   r-   r|   r}   rp   r8   r~   s     @r   gc_iupaczSequence.gc_iupac9  s     &.DHNN4D4DEE::::M:::::
c6666I66666==
c>>>>,=>>>>>DD
c6666I66666==
gmmC  4''CLL  r   )r)   r)   NNF)r   r   r   r   r4   rG   rK   rN   rT   rV   rZ   propertyrS   re   rM   rh   rj   rq   r   r   r   r   r   r(   r(   M   sn        $ $ $ $UI UI UIn  % % % F F F  ' ' '   X 	 	 X	 
 
 X
   X   X( ' ' X' 	! 	! X	! ! ! X! ! !r   r(   c                   $    e Zd ZdZd Zd Zd ZdS )IndexRecordr   c                     t          |          t          k    rt          | |          S t                              | |          S r+   )typer2   getattrtuplerG   )r3   keys     r   rG   zIndexRecord.__getitem__O  s9    994%%%  s+++r   c                 @     dj         di |                                 S )Nz%{rlen:d}	{offset:d}	{lenc:d}	{lenb:d}r   )format_asdictrJ   s    r   rK   zIndexRecord.__str__T  s/    @9@  llnn  	r   c                     | j         S r+   )rlenrJ   s    r   rV   zIndexRecord.__len__X  s
    yr   N)r   r   r   	__slots__rG   rK   rV   r   r   r   r   r   J  sH         I, , ,
      r   r   )r   offsetlenclenbbend	prev_bendc                       e Zd ZdZddd ddddddd dddddfdZd	 Zd
 Zd Zd Zd Z	d Z
d Zd Zd Zd ZddZd Zd Zd Zd Zd Zd Zd Zd Zd ZdS )Faidxz: A python implementation of samtools faidx FASTA indexing Nc                     | S r+   r   xs    r   <lambda>zFaidx.<lambda>c       r   Fstopc                     dS NTr   r   s    r   r   zFaidx.<lambda>j       r   Tc                 <   t           rt          |t           j        j                  r~|j        | _        t          |dd          dk    sJ t          |dd          J 	 |                                | _        n # t          $ r t          d|z            w xY w|j        | _        nt          |t                    st          |d          rWt          |          | _        	 t          ||rdnd          | _        n # t          $ r t          d|z            w xY wd| _        nt          d	|z            t           r8t          |t           j        j                  r|j        | _        |j        | _        nqt          |t                    st          |d          rt          |          | _        d| _        n0|| j        d
z   | _        | j        | _        nt          d|z            | j                                                            d          r	 ddlm} ddlm} ddlm}  ||           |d          k     rt2          	 d| _        	 |                    | j        d          | _        n# t8          t          f$ r t;          d          w xY w# t2          $ r t3          d          w xY w| j                                                            d          rt;          d          d| _        || _        || _        	 |                     d          }t          |t                    s/tA          d!                    tE          |                              n# tF          $ r
}Y d}~nd}~ww xY w|| _$        |
dv sJ |
| _%        || _&        || _'        | j        r| j'        tQ          d          | j        rd| _)        n|| _)        |	| _*        || _+        || _,        t[                      | _.        t_                      | _0        tc          d          | _2        |rt          |tf                    r|| _4        nDt          |tf                    s/t9          d!                    tE          |                              || _5        | j0        5  | j        %tl          j        7                    | j                  }n| j        7                    | j                  }|rptq          | j        | j                  }tq          | j        | j                  }||1ts          j:        dtE          | j                  j;        z             d}n	||k    }nd}|rD|r|r@|r>	 | <                                 n(# tz          $ r | j        >                                  w xY w	 | ?                                 n(# tF          $ r | j        >                                  w xY w|r:|s8ts          j:        d !                    | j        | j                  t                     ddd           dS # 1 swxY w Y   dS )!a  
        filename: name of fasta file or fsspec.core.OpenFile instance
        indexname: name of index file or fsspec.core.OpenFile instance
        key_function: optional callback function which should return a unique
          key for the self.index dictionary when given rname.
        as_raw: optional parameter to specify whether to return sequences as a
          Sequence() object or as a raw string.
          Default: False (i.e. return a Sequence() object).
        moderbcompressionNz"Cannot read FASTA from OpenFile %s
__fspath__zr+bzCannot read FASTA from file %sz>filename expected str, os.PathLike or fsspec.OpenFile, got: %rz.faizIindexname expected NoneType, str, os.PathLike or fsspec.OpenFile, got: %r)z.bgzz.gzr   )bgzf)__version__)Versionz1.73T)fileobjr   zCompressed FASTA is only supported in BGZF format. Use the samtools bgzip utility (instead of gzip) to compress your FASTA. For example: gunzip file.fa.gz; bgzip file.faz=BioPython >= 1.73 must be installed to read block gzip files.)z.bz2z.zipzUCompressed FASTA is only supported in BGZF format. Use bgzip to compresss your FASTA.Fz!TestingReturnType of_key_functionz5key_function argument should return a string, not {0})r   firstlastlongestshortestdropzwThe default_seq argument is not supported with using BGZF compression. Please decompress your FASTA file and try again.))r-   N)r,   N)r.   N)r/   Nz%read_ahead value must be int, not {0}z(for fsspec: %s assuming index is currentz,Index file {0} is older than FASTA file {1}.)Afsspecr1   coreOpenFilepathfilenamer   openfileIOErrorr   fs_fsr2   hasattr	TypeError	indexname_fai_fslowerendswithBior   r   packaging.versionr   ImportError_bgzf
BgzfReaderr:   r&   read_long_nameskey_functionr   r   r   	Exceptionfilt_functionduplicate_actionas_rawdefault_seqr    strict_bounds
split_charone_based_attributessequence_always_upperr   indexr	   lockdictbufferr>   
read_aheadmutableosexistsgetmtime_fsspecra   rb   r   build_indexr   closeread_faiRuntimeWarning)r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rebuildr   r   bgzf_versionr   key_fn_testeindex_existsf_mtimei_mtimeindex_is_stales                             r   r4   zFaidx.__init___  sC   6  	ij6;+?@@ 	i$MDM8VT22d::::8]D99AAAZ$MMOO		 Z Z Z()MPX)XYYYZ{DHH#&& 		i'(L*I*I 		iMMDMV G+E55FF		 V V V()IH)TUUUVDHH \_gghhh 	ujFK,@AA 	u&^DN$<DLL	3'' 		u79l+K+K 		u ^^DNDLL!]V3DN8DLL gjssttt=  ))/:: 	$$$$$$;;;;;;5555557<((776??::%% ; "
	 $	 M MDII"G,   6H    U U U!SU U UU ]  ""++,<== 	.12 2 2 DJ.(	++35 5Kk3// /&KF4,,--/ / //  	 	 	DDDD	* $8 8 8 8 8 0&: 	$*6 J   : 	/!%D!.D$$8!%:" ]]
FF	 + , , 	#Z
C88 	#(DOOJ,, 	#DKKZ  " " # # # Y '	 '	|#!w~~dn==#|224>BB 	')$-BB)$.$,GG?goM"LtTXT\~~Of"fggg%*NN%,w%6NN!& %*8=D$$&&&&)   IOO%%%   	!!!  g BII %  G'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	s   A8 8BC9 9D1J 
!I, ,!JJ*>AM 
M,'M,CXU"!X"%VXV X %W?XXXc                     | j         d         sdS |\  }}}| j         d         |k    r$| j         d         |k    r| j         d         |k    rdS dS )Nr,   Fr.   r/   Tr   )r3   regionr,   r.   r/   s        r   __contains__zFaidx.__contains__  sb    {6" 	5!eS;v$&&4;w+?5+H+HT[Y^M_cfMfMf45r   c                     d| j         z  S )NzFaidx("%s")r   rJ   s    r   rT   zFaidx.__repr__      ..r   c              #      K   | j                                         D ]+\  }}d                    |t          |                    V  ,dS )z< Returns the string representation of the index as iterable z{k}	{v}
)kvN)r   itemsr   r2   )r3   r   r   s      r   _index_as_stringzFaidx._index_as_string  sY      J$$&& 	3 	3DAq##a3q66#22
2
2
2
2	3 	3r   c           
      D   	 |                      d          5 }d}g }|D ]"}|                                }|                    d          \  }}}}}	t          t          ||||	f          \  }}}}	|r%t	          t          ||z            |	|z
  z            nd}
||
z   |z   }t          ||||	||          }| j        r|                     |          }| j	        rAt          | j        |                     |                              | j	                            }n)t          | j        |                     |          g          }|D ]}|| j        v r| j        dk    rt          d|z            | j        dk    r4| j        dk    r|| j        |<   J| j        d	k    r6t!          |          t!          | j        |                   k    r
|| j        |<   | j        d
k    r6t!          |          t!          | j        |                   k     r
|| j        |<   | j        dk    r||vr|                    |           || j        |<   |}$	 d d d            n# 1 swxY w Y   |D ]}| j                            |d            d S # t&          $ r t)          d| j        z            w xY w)Nrr   r   	r   zDuplicate key "%s"r   r   r   r   r   zCould not read index file %s)	_open_fairstripsplitmapr>   r   r   r   _long_name_from_index_recordr   filterr   r   r   r   r:   r8   appendpopr   r   r   )r3   r   r   	drop_keyslinernamer   r   r   r   newlinesr   recr   dups                  r   r   zFaidx.read_fai  sY   /	AS)) )%U		! &% &%D;;==D6:jj6F6F3E4t/2348&$3M0O 0O,D&$IMTs4t#4#4t#DEEESTH!H,t3D%dFD$&/1 1C+ G $ A A# F F C &t'9'+'8'8'?'?'E'E+/?(< (<!= !=
 !'t'9(,(9(9%(@(@'A!C !C$ 2 2$*,,#4>>&01E1K&L&L L!%!6'!A!A (!%!6&!@!@25
3!%!6)!C!C#&s88c$*S/.B.B#B#B69DJsO!%!6*!D!D#&s88c$*S/.B.B#B#B69DJsO!%!6&!@!@#&i#7#7$-$4$4S$9$9$9.1DJsOO $IIM&%)% )% )% )% )% )% )% )% )% )% )% )% )% )% )%T ! * *
sD))))* * 	A 	A 	A$.?A A A	As/   I= H+II= II= I#I= ="Jc                 *   | j                                         dk    sJ 	 t          | j                   5 }t          d          5 }d }d}d}d}d}g }|}	d}
d }t	          |          D ]\  }}t          |          }|                                }t          |                    d                    }|}|d         dk    r$t          |||dz
            }
|
r3|dk    r-|	                    d
                    |||	||                     n4|
s2t          d	
                    ||d         d         dz                       d}d}d}g }	 |                    d          dd                                          d         }nD# t          $ r7 t          d
|                    d          dt          |          d          w xY w||z  }| j        r|                                n|}	~|s|}|s|}||k    s|dk    r|                    ||f           ||z  }||z  }|
st          d| j        z            |rt          |||          }
|
r-|	                    d
                    |||	||                     n2t          d	
                    ||d         d         dz                       |                    d           |                     d          5 }t)          j        ||           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S # t,          t          f$ rN}t/          |t,                    rt-          d| j        z            t/          |t                    r|Y d }~d S d }~ww xY w)Nr   zw+r   Fz
rQ   r6   z{0}	{1:d}	{2:d}	{3:d}	{4:d}
z[Line length of fasta file is not consistent! Inconsistent line found in >{0} at line {1:n}.zBad sequence name z	 at line .ziThe FASTA file %s does not contain a valid sequence. Check that sequence definition lines start with '>'.z{0:s}	{1:d}	{2:d}	{3:d}	{4:d}
wzd%s may not be writable. Please use Fasta(rebuild=False), Faidx(rebuild=False) or faidx --no-rebuild.)r   tellRewindr   	enumerater8   decoder  check_bad_lineswriter   r   r  
IndexErrorr2   r   r  r   seekr  shutilcopyfileobjr   r1   r   )r3   	fastafile	indexfiler  r   r   blenclen	bad_lines
thisoffsetvalid_entrylastlinerw   r  	line_blen	line_clentargetr   s                     r   r   zFaidx.build_indexN  s   y~~1$$$$]		"" U>i"--- T> EFDDD "I!'J"'K#H#,Y#7#7 .. ..4$'II	#{{}}$'F(;(;$<$<	#$7c>>*9 %y!a%+9 +9K* Eq1uu )$G$N$N(-tZt%M %M!N !N !N !N &1 E&8%2 39&(-y|A/B3D 3D'E 'E !E $%D#$D#$D(*IC(,F(;(;ABB(?(E(E(G(G$%)'#- C C C&8&8%)[[%8%8%8%8#a&&&&%B'C 'C !CC #i/F!%*8 * * *17 'J $( 1'0#' 1'0  )D00INN ) 0 0!Y @ @ @"i/F I-DD ' d0SUYUbcd d d
  +&5!9h' ' ' K%OO E L L$)4T4!I !IJ J J J #5!. /5fU5>q\!_q5H/J /J#K #K K NN1%%%S11 >V*9f===> > > > > > > > > > > > > > >gT> T> T> T> T> T> T> T> T> T> T> T> T> T> T>U> U> U> U> U> U> U> U> U> U> U> U> U> U> U> U> U> U>l +, 	 	 	!W%% zn%& & & A122      	s   L3 L&DL5E>=L>AF?	?DLK7+L7K;;L>K;?LL&L	L&L	L&L3 &L**L3 -L*.L3 3NANNc                     | j         5  |                     d          5 }|                                 D ]}|                    |           	 d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr  r   )r   r  r   r  )r3   outfiler  s      r   	write_faizFaidx.write_fai  s	   Y 	( 	(S)) (W 1133 ( (DMM$''''(( ( ( ( ( ( ( ( ( ( ( ( ( ( (	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(s4   A1-AA1A	A1 A	!A11A58A5c                 ~    | j         r!| j                             | j        |          S t          | j        |          S )Nr   )r   r   r   )r3   r   s     r   r  zFaidx._open_fai  s>    < 	3<$$T^$$???T2222r   c                 r    || j         d         z
  }|| j         d         z
  dz   }| j         d         ||         S )Nr.   r6   r-   r   )r3   r.   r/   i_starti_ends        r   from_bufferzFaidx.from_buffer  s?    $+g..dk'**Q.{5!'%-00r   c                     	 |                      |||          }|| j        d<   || j        d<   || j        d<   || j        d<   d S # t          $ r Y d S w xY w)Nr-   r.   r/   r,   )	from_filer   r    r3   r,   r.   r/   r-   s        r   fill_bufferzFaidx.fill_buffer  st    	..uc22C!$DK#(DK !$DK"&DK 	 	 	DD	s   ?A 
AAc                     | j         r&|||f| vr|                     |||| j         z              |||f| v r|                     ||          }n|                     |||          }|                     ||||          S r+   )r   r3  r/  r1  
format_seqr2  s        r   fetchzFaidx.fetch  s    ? 	AD%#5#=#=T5#*?@@@%%%""5#..CC..uc22CsD%555r   c                 F   |t          |          k    sJ |t          |          k    sJ 	 | j        |         }n0# t          $ r# t          d                    |                    w xY w|dz
  }|dk     rt          d          ||z
  }|dk    r!|j        rt          |dz
  |j        z            nd}|j        rt          ||j        z            nd}	|	|z
  }
|j        |j        z
  }|
|z  |z   }|j        ||z  z   |z   }|dk     r*| j        r#t          d                    ||                    ||j	        k    r*| j        r#t          d                    ||                    | j
        5  | j        rj| j                            |j                   |||z  z   |
|z  z   |z   }| j                            |                                          }|||z   d         }nt| j                            |           ||z   |j        k    r| j        s
|j        |z
  }|dk    r-| j                            |                                          }n|dk    rd}ddd           n# 1 swxY w Y   |s*|                    d	d                              d
d          S |t%                      fS )aT   Fetch the sequence ``[start:end]`` from ``rname`` using 1-based coordinates
        1. Count newlines before start
        2. Count newlines to end
        3. Difference of 1 and 2 is number of newlines in [start:end]
        4. Seek to start position, taking newlines into account
        5. Read to end position, return sequence
        zARequested rname {0} does not exist! Please check your FASTA file.r6   r   z2Requested start coordinate must be greater than 1.z9Requested coordinates start={0:n} end={1:n} are invalid.
z0Requested end coordinate {0:n} outside of {1}. 
Nr)   rP   )r>   r   KeyErrorr    r   r   r   r   r   r   r   r   r   r  readr  r   replacelocals)r3   r  r.   r/   	internalsrw   start0seq_lennewlines_beforenewlines_to_endnewlines_insidenewline_blenseq_blenbstartchunk	chunk_seqr-   s                    r   r1  zFaidx.from_file  sE    E

""""c#hh	L
5!AA 	L 	L 	L ==CVE]]L L L	L A::DF F F, '-qjjQVj aZ16!# # #AB 	/0v<#cAFl+++1)O;v"\1G;Ol::VCa<<D.< **0&*<*<> > >16\\d0\ ""(&e"4"46 6 6 Y 	 	z 	qx(((/L"@A_WcEcdgnn INN51188::	 8 9 9:	v&&& H$qv--d6H- vHa<<)..2299;;CC ]]C%	 	 	 	 	 	 	 	 	 	 	 	 	 	 	(  	#;;tR((00r:::?"s   : -A'*C&II #I c                    |dz
  }t          |          ||z
  k     r<| j        r5||z
  t          |          z
  }d                    ||| j        z  g          }n|t          |          z   }| j        r|                                }| j        s|}| j        r|S t          |t          |          t          |          |          S )Nr6   r)   )r,   r.   r/   r-   )	r8   r   rR   r   r|   r   r   r(   r>   )r3   r-   r  r.   r/   r>  pad_lens          r   r5  zFaidx.format_seq  s    
 
&L !- FlSXX-G''3$*: :;<<CC3s88#C% 	))++C( 	E; 	EJ#e**#c((E E E Er   c                 `   | j         st          d          | j         r| j        rt          d          |                     |||d          \  }}| j        5  t          |          t          |          |d         z
  k    rt          d          t          |          t          |          |d         z
  k    r\|d         j        }d|v rd}n	d	|v rd	}nd
}| j        	                    |d                    |d         dk    r-| j        
                    |                                           n|d         dk    rd}	|                    |          }
|
t          |          k     rc| j        
                    d                    ||	|
         |g                                                     |
}	|
|z  }
|
t          |          k     c| j        
                    ||	d                                                    | j                                         ddd           dS # 1 swxY w Y   dS )zb Write sequence in region from start-end, overwriting current
        contents of the FASTA file. zXWrite attempted for immutable Faidx instance. Set mutable=True to modify original FASTA.zCWriting to mutable instances is not implemented for fsspec objects.T)r=  rB  zISpecified replacement sequence needs to have the same length as original.rw   z
r8  rP   rE  r   r)   N)r   r   r   NotImplementedErrorr1  r   r8   r   r   r  r  encoder   rR   flush)r3   r  r.   r/   r-   file_seqr=  line_lennewline_charr?   ms              r   to_filezFaidx.to_file(  sz    | 	j   \ 	dh 	%U   #nnUE3$nOO)Y 	& 	&3xx3x==95F+GGGG_   SS]]Y7H-IIII$S>.X%%#)LLX%%#'LL#'L	y2333./144IOOCJJLL111101A55A |44Ac#hh,,	QqS<0H(I(I(P(P(R(RSSSX c#hh,, IOOCGNN$4$4555IOO%%%1	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   F?H##H'*H'c                 ~    | j         |         }| j        r|                     |          S |                     |          S )zX Return the full sequence defline and description. External method using the self.index )r   r   _long_name_from_bgzfr  )r3   r  index_records      r   get_long_namezFaidx.get_long_nameO  sA    z%(: 	C,,\:::44\BBBr   c                     |j         }|j        }| j                            |           | j                            ||z
                                            dd         S )zW Return the full sequence defline and description. Internal method passing IndexRecord r6   r7   )r   r   r   r  r:  r  )r3   rU  r   defline_ends       r   r  z"Faidx._long_name_from_index_recordW  sT     *	")	y!!!y~~kI566==??"EEr   c                      t          d          )a   Return the full sequence defline and description. Internal method passing IndexRecord
        This method is present for compatibility with BGZF files, since we cannot subtract their offsets.
        It may be possible to implement a more efficient method. zlFastaRecord.long_name and Fasta(read_long_names=True) are not supported currently for BGZF compressed files.)	rK  r   r   r  r:  r  r  rR   r  )r3   rU  r   deflinerF  s        r   rT  zFaidx._long_name_from_bgzf^  s     "EF F 	Fr   c                 .    |                                   d S r+   __exit__rJ   s    r   r   zFaidx.closeo      r   c                 .    |                                   d S r+   r\  rJ   s    r   __del__zFaidx.__del__r  r^  r   c                     | S r+   r   rJ   s    r   	__enter__zFaidx.__enter__u      r   c                 8    | j                                          d S r+   )r   r   r3   argss     r   r]  zFaidx.__exit__x  s    	r   F)r   r   r   r   r4   r   rT   r   r   r   r*  r  r/  r3  r6  r1  r5  rR  rV  r  rT  r   r`  rb  r]  r   r   r   r   r   \  s       DD  !)k$  "(-~&*!&',!!j j j jX  / / /3 3 3
0A 0A 0Ad_ _ _B( ( (3 3 31 1 1
  	6 	6 	6;# ;# ;# ;#zE E E,%& %& %&NC C CF F F5 5 5"          r   r   c                       e Zd ZddgZd Zd Zd Zd Zd Zd Z	e
d	             Zd
 Ze
d             Ze
d             Ze
d             ZdS )FastaRecordr,   _fac                 "    || _         || _        d S r+   )r,   rj  )r3   r,   fas      r   r4   zFastaRecord.__init__  s    	r   c                    	 t          |t                    r|j        |j        |j        }}}|d}|t          |           }|dk     rt          |           |z   }|dk     rt          |           |z   }| j                            | j        |dz   |          dd|         S t          |t                    r?|dk     rt          |           |z   }| j                            | j        |dz   |dz             S dS # t          $ r  w xY w)zYReturn sequence from region [start, end)

        Coordinates are 0-based, end-exclusive.Nr   r6   )r1   r;   r.   r   stepr8   rj  get_seqr,   r>   r    )r3   r?   r.   r   rn  s        r   rG   zFastaRecord.__getitem__  s   	!U## A$%GQVQVTt=E<t99D!88t99t+D199II-Ex''	519dCCFFdFKKAs## Aq55D		AAx''	1q5!a%@@@A A  	 	 		s   BC4 AC4 4D c              #      K   | j         j        j        | j                 j        }d}	 ||z   }|t          |           k     r| ||         V  n| |d         V  dS ||z  }9)zK Construct a line-based generator that respects the original line lengths. r   TNrj  faidxr   r,   r   r8   )r3   rO  r.   r/   s       r   __iter__zFastaRecord.__iter__  s~      8>'	27	("CSYY59o%%%%566l"""XE	r   c              #   T  K   | j         j        j        | j                 j        }t          |           |z  }|dk    r|}t          |           }||z
  }	 |dk    r| ||         ddd         V  n| d|         ddd         V  dS |t          |           k    r||z  }n||z  }||z
  }W)z Reverse line-based generator r   TNr7   rq  )r3   rO  	last_liner/   r.   s        r   __reversed__zFastaRecord.__reversed__  s      8>'	27II(	>> I$iii
	#qyy59oddd+++++4C4j2&&&&c$iiy x(NE
	#r   c                     d| j         z  S )NzFastaRecord("%s"))r,   rJ   s    r   rT   zFastaRecord.__repr__  s    "di00r   c                 D    | j         j        j        | j                 j        S r+   )rj  rr  r   r,   r   rJ   s    r   rV   zFastaRecord.__len__  s    x~#DI.33r   c                    t          |           }d}t          |           D ]G}|r nBt          |t                    r|j        }|                                D ]}|dk    r|dz  }d} Hd}t          |           D ]G}|r nBt          |t                    r|j        }|                                D ]}|dk    r|dz  }d} H|S )z Returns the length of the contig without 5' and 3' N padding.
        Functions the same as contigNonNSize in Fasta.cpp at
        https://github.com/Illumina/hap.py/blob/master/src/c%2B%2B/lib/tools/Fasta.cpp#L284
        Fr   r6   T)r8   iterr1   r(   r-   r|   reversed)r3   lengthr   r  bases        r   unpadded_lenzFastaRecord.unpadded_len  s     TJJ 
	 
	D $))  x

  3;;aKFFDTNN 
	 
	D $))  x

  3;;aKFFDr   c                 0    t          | d d                    S r+   rX   rJ   s    r   rK   zFastaRecord.__str__  s    47||r   c                    t          | j        t                    rg }| j        j                            | j        dt          |                     }|D ]n}|j        re|                    | j        j	                  }|j
        | j        j
        v r3t          | j        j                  r|                    |j                   ot          |          S t!          d          )Nr   z,variant_sites() only valid for FastaVariant.)r1   rj  FastaVariantvcfr6  r,   r8   is_snpgenotypesamplegt_typeevalr  r  POSr   rK  )r3   posvarsiter  s        r   variant_siteszFastaRecord.variant_sites  s    dh-- 	@C(,$$TY3t99==C - -; -!]]48?;;F~)999d HO?- ?-9

48,,,::%>@ @ @r   c                 J    | j         j                            | j                  S )z? Read the actual defline from self._fa.faidx mdshw5/pyfaidx#54 )rj  rr  rV  r,   rJ   s    r   re   zFastaRecord.long_name  s     x~++DI666r   c                     t          |           fddt          t          |                               d                    dS )z/ Implement numpy array interface for issue #139z|S1   ascii)shapetypestrr
   data)r8   
memoryviewr2   rL  rJ   s    r   __array_interface__zFastaRecord.__array_interface__  sC     $ii]s4yy//8899	
 
 	
r   N)r   r   r   r   r4   rG   rs  rv  rT   rV   r   r~  rK   r  re   r  r   r   r   ri  ri  |  s        I    0  # # #*1 1 14 4 4   X@   @ @ X@ 7 7 X7 
 
 X
 
 
r   ri  c                   $     e Zd Z fdZd Z xZS )MutableFastaRecordc                     t          t          |                               ||           | j        j        j        rt          d          d S )NzaBGZF compressed FASTA is not supported for MutableFastaRecord. Please decompress your FASTA file.)superr  r4   rj  rr  r   r&   )r3   r,   rl  r=   s      r   r4   zMutableFastaRecord.__init__  sT     $''00r:::8> 	6.56 6 6	6 	6r   c                 @   	 t          |t                    r|j        |j        |j        }}}|rt          d          |sd}|st          |           }|dk     rt          |           |z   }|dk     rt          |           |z   }| j        j        	                    | j
        |dz   ||           dS t          |t                    rE|dk     rt          |           |z   }| j        j        	                    | j
        |dz   |dz   |          S dS # t          t          f$ r  w xY w)zhMutate sequence in region [start, end)
        to value.
        Coordinates are 0-based, end-exclusive.z!Step operator is not implemented.r   r6   N)r1   r;   r.   r   rn  r  r8   rj  rr  rR  r,   r>   r    r   )r3   r?   valuer.   r   rn  s         r   __setitem__zMutableFastaRecord.__setitem__  sE   	!U## N$%GQVQVTt J$%HIII E %t99D!88t99t+D199II-E&&ty%!)T5IIIIIAs## Nq55D		AAx~--diQAuMMMN N G$ 	 	 		s   B*D
 .AD
 
D)r   r   r   r4   r  __classcell__r=   s   @r   r  r    sG        6 6 6 6 6      r   r  c                       e Zd Zddd dddddd ddddddfdZd Zd	 Zd
 Zd Zd ZddZ	ddZ
d Zd Zd Zd Zd Zd ZdS )FastaNc                     | S r+   r   r   s    r   r   zFasta.<lambda>/  r   r   Fc                     dS r   r   r   s    r   r   zFasta.<lambda>5  r   r   Tr   c                     | _         t          |f||||||||	|
||||||d _         j        j         _         j         rt          nt
          t           fd j        j                                        D                        _	        dS )z
        An object that provides a pygr compatible interface.
        filename:  name of fasta file or fsspec.core.OpenFile instance
        indexname: name of index file or fsspec.core.OpenFile instance
        )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c                 ,    g | ]}| |          fS r   r   )rv   r  _record_constructorr3   s     r   
<listcomp>z"Fasta.__init__.<locals>.<listcomp>W  s-    #s#s#sRWU,?,?t,L,L$M#s#s#sr   N)
r   r   rr  r   r  ri  r   r   keysrecords)r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  s   `                @r   r4   zFasta.__init__+  s    , %%#'!!'!5+-"7#!% % %
$ 
+48LQ00k"#s#s#s#s#s[_[e[k[p[p[r[r#s#s#sttr   c                     || j         j        v S )z&Return True if genome contains record.)rr  r   r3   r  s     r   r   zFasta.__contains__Y  s    
(((r   c                    t          |t                    r5t          t          | j                                        |d                    }	 | j        |         S # t          $ r) t          d                    || j                            w xY w)z8Return a chromosome by its name, or its numerical index.Nz{0} not in {1}.)	r1   r>   nextr   r  r  r9  r   r   r  s     r   rG   zFasta.__getitem__]  s    eS!! 	C 1 1 3 3UDAABBE	K<&& 	K 	K 	K,33E4=IIJJJ	Ks   A 3Bc                     d| j         z  S )NzFasta("%s")r   rJ   s    r   rT   zFasta.__repr__f  r   r   c                 N    t          | j                                                  S r+   )rz  r  valuesrJ   s    r   rs  zFasta.__iter__i  s    DL''))***r   c                 4    t          d | D                       S )zAReturn the cumulative length of all FastaRecords in self.records.c              3   4   K   | ]}t          |          V  d S r+   )r8   )rv   records     r   ry   z Fasta.__len__.<locals>.<genexpr>n  s(      2263v;;222222r   )r}   rJ   s    r   rV   zFasta.__len__l  s    22T222222r   c                 H    | j                             |||          }|r| S |S )zReturn a sequence by record name and interval [start, end).

        Coordinates are 1-based, end-exclusive.
        If rc is set, reverse complement will be returned.
        rr  r6  )r3   r,   r.   r/   rcr-   s         r   ro  zFasta.get_seqp  s1     jtUC00 	4KJr   c                      fd|D             }|d         j         }|d         j        }|r)d                    d |ddd         D                       }nd                    d |D                       }t          |dd          S )	zReturn a sequence by record name and list of intervals

        Interval list is an iterable of [start, end].
        Coordinates are 1-based, end-exclusive.
        If rc is set, reverse complement will be returned.
        c                 N    g | ]!\  }}j                             ||          "S r   r  )rv   sr   r,   r3   s      r   r  z)Fasta.get_spliced_seq.<locals>.<listcomp>  s1    EEE41a$*""4A..EEEr   r   r7   r)   c                     g | ]
}| j         S r   rI   rv   rF  s     r   r  z)Fasta.get_spliced_seq.<locals>.<listcomp>  s    BBBEUF<BBBr   Nc                     g | ]	}|j         
S r   rI   r  s     r   r  z)Fasta.get_spliced_seq.<locals>.<listcomp>  s    99959999r   )r,   r-   r.   r/   )r.   r/   rR   r(   )r3   r,   	intervalsr  chunksr.   r/   r-   s   ``      r   get_spliced_seqzFasta.get_spliced_seq}  s     FEEEE9EEEq	Rjn  	;''BBVDDbD\BBBCCCC''99&999::C Ts$DAAAAr   c                 4    | j                                         S r+   )r  r  rJ   s    r   r  z
Fasta.keys  s    |  """r   c                 4    | j                                         S r+   )r  r  rJ   s    r   r  zFasta.values  s    |""$$$r   c                 4    | j                                         S r+   )r  r   rJ   s    r   r   zFasta.items  s    |!!###r   c                 .    |                                   d S r+   r\  rJ   s    r   r   zFasta.close  r^  r   c                     | S r+   r   rJ   s    r   rb  zFasta.__enter__  rc  r   c                 $     | j         j        |  d S r+   )rr  r]  re  s     r   r]  zFasta.__exit__  s    
T""""r   rg  )r   r   r   r4   r   rG   rT   rs  rV   ro  r  r  r  r   r   rb  r]  r   r   r   r  r  *  s4         !)k$  -~&*!&"(',!!,u ,u ,u ,u\) ) )K K K/ / /+ + +3 3 3   B B B B,# # #% % %$ $ $    # # # # #r   r  c                   N     e Zd ZdZ ed          Z	 	 	 	 d fd	Zd Zd Z xZ	S )	r  z9 Return consensus sequence from FASTA and VCF inputs
    )rQ   <=!NTc                 $     t          t                     j        |fi | 	 dd l}n# t          $ r t	          d          w xY w|	 |                                \  }	}
}n# t          $ r t          d          w xY wt           fdt          |
          D                       sJ t          d t          |	          D                       sJ t          d t          |          D                       sJ  dj
        di t                       _        nd _        t          j                            |          r|                    |	           _        n"t#          d

                    |                    t          j                            |dz             s"t%          d
                    |                    || _        nh j        j        d          _        t+           j        j                  dk    r4|2t-          j        d
                     j                  t0                     |r|rt3          d           _        d S |rt3          d           _        d S |rt3          d           _        d S t3                       _        d S )Nr   z*PyVCF3 must be installed for FastaVariant.z7call_filter must be a string in the format 'XX <>!= NN'c                 "    g | ]}|j         v S r   )expr)rv   r   r3   s     r   r  z)FastaVariant.__init__.<locals>.<listcomp>  s    ;;;1TY;;;r   c                 *    g | ]}|t           j        v S r   )stringascii_uppercaserv   r   s     r   r  z)FastaVariant.__init__.<locals>.<listcomp>  s     GGGV33GGGr   c                 *    g | ]}|t           j        v S r   )r  	printabler  s     r   r  z)FastaVariant.__init__.<locals>.<listcomp>  s     CCC!V--CCCr   zsample['{key}'] {expr} {value}Truer   zFile {0} does not exist.z.tbizFile {0} has not tabix index.r6   zUsing sample {0} genotypes.)r6   r_   )r6   )r_   r   )r  r  r4   r  r   r  r  r:   alllistr   r<  r  r   r   r   Readerr   r   r  samplesr8   ra   rb   r   setr  )r3   r   vcf_filer  hethomcall_filterkwargsr  r   r  r  r=   s   `           r   r4   zFastaVariant.__init__  s    	+lD!!*8>>v>>>	LJJJJ 	L 	L 	LJKKK	L"O#.#4#4#6#6 T55 O O O MO O OO ;;;;T

;;;<<<<<GGT#YYGGGHHHHHCCtE{{CCCDDDDDA:AMMFHHMMDKK DK7>>(## 	Gzz8z44DHH4;;HEEFFFw~~h/00 	Z'(G(N(Nx(X(XYYY DKK(*1-DK48#$$q((V^;BBK! !"02 2 2 	!3 	!v;;DLLL 	!u::DLLL 	!u::DLLL55DLLLs   * A
A# #A=c                 \    d| j         d| j        j         dt          | j                  dS )NzFastaVariant("z", "z", gt="z"))r   r  r2   r  rJ   s    r   rT   zFastaVariant.__repr__  s:     6:mmm6:h6G6G6G69$,6G6G6G6GI 	Ir   c                    | j                             |||          }| j         j        rt          |          }~nt          |j                  }|`	 | j                            ||dz
  |          }|D ]}|j        r}|                    | j                  }|j	        | j	        v rUt          | j                  rA|j        d         }	|j        dz
  |dz
  z
  }
t          |	          ||
|
t          |	          z   <   n9# t           $ r,}| j        j        || j        j        j        vrn|Y d}~nd}~ww xY w| j         j        r#d                    |d||z
  dz                      S d                    |d||z
  dz                      |_        |S )zReturn a sequence by record name and interval [start, end).
        Replace positions with polymorphism with variant.
        Coordinates are 0-based, end-exclusive.
        r6   r   Nr)   )rr  r6  r   r  r-   r  r  r  r  r  r  r  ALTr  r2   r8   r:   _tabixcontigsrR   )r3   r,   r.   r/   r-   seq_mutr  r  r  altrw   r   s               r   ro  zFastaVariant.get_seq  s   
 jtUC00: 	3iiG37mmG	(..uqy#66C ; ;= ;#__T[99F~55$t{:K:K5$jm#Z!^	:25c((!c#hh,/;  	 	 	x*t48?;R/R/R   		 : 	777#3C%K!O#34555ggg&6sU{Q&6788CGJs   B(C: :
D0"D++D0)NTTN)
r   r   r   r   r  r  r4   rT   ro  r  r  s   @r   r  r    s         3#$$D
 !-! -! -! -! -! -!^I I I
# # # # # # #r   r  r)   c              #      K   t          |          g| z  }t          |d|iD ]}d                    |dz             V  d S )N	fillvaluer)   )rP   )rz  r   rR   )r?   sequencer  rf  r  s        r   wrap_sequencer    s^      NNaD$7i77 ' 'ggdXo&&&&&&' 'r   c                   $    e Zd ZdZd Zd Zd ZdS )r  z[
    use a fileobject in a context manager and rewind it back to its original position
    c                 "    || _         d | _        d S r+   )r   origin)r3   r   s     r   r4   zRewind.__init__  s    r   c                 L    | j                                         | _        | j         S r+   )r   r  r  rJ   s    r   rb  zRewind.__enter__  s    l''))|r   c                 R    | j                             | j                   d | _        d S r+   )r   r  r  )r3   exc_typeexc_valexc_tbs       r   r]  zRewind.__exit__  s%    $+&&&r   N)r   r   r   r   r4   rb  r]  r   r   r   r  r    sK                 r   r  c                    |t          |           }nK|                    |           }d|v r	|d         }n)d|v r	|d         }nd|v r	|d         }nd|v r	|d         }ndS t          |t                    r|S t          |t                    rEt
          j                            |                    dd                                                    S t          |t
          j                  r|                                S dS )z>get the modification time of a file in a fsspec compatible wayNmtimeLastModifiedupdatedcreatedZz+00:00)	r   statr1   floatr2   datetimefromisoformatr;  	timestamp)r   r   r  f_infos       r   r   r     s	   	z f7OEEv%%>*EE&  9%EE&  9%EE4% 	E3		  ..u}}S(/K/KLLVVXXX	E8,	-	-    tr   ) ACTGNactgnYRWSKMDVHBXyrwskmdvhbx TGACNtgacnRYWSMKHBDVXrywsmkhbdvxc              #   l   K   | ]/}t          |          t          d          vt          |          V  0dS )r   N)chrcomplement_mapr  s     r   ry   ry   >  sJ       C C#a&&q0A"A"ACFF"A"A"A"AC Cr      r6   c                 F    t                        j        t           }t          |          t                     k    rat	           fdt          t                               D                       }t          d                     |         |dz                       |S )z` Returns the complement of seq.
    >>> seq = 'ATCGTA'
    >>> complement(seq)
    'TAGCAT'
    c              3   :   K   | ]}|         t           v |V  d S r+   )invalid_characters_set)rv   rw   r-   s     r   ry   zcomplement.<locals>.<genexpr>P  s?       &L &L#a&4J*J*JA*J*J*J*J&L &Lr   z<Sequence contains non-DNA character '{0}' at position {1:n}
r6   )r2   	translatetranslate_argumentsr8   r  ranger:   r   )r-   resultfirst_invalid_positions   `  r   rM   rM   G  s     c((CS]/0F
6{{c#hh!% &L &L &L &LSXX&L &L &L "L "LKF3-.0F0JKKM M 	M Mr   c                 L    t          t          | |                    fd}|S )Nc                     |          S r+   r   )r  chr_name_maps    r   map_to_functionz+translate_chr_name.<locals>.map_to_function[  s    E""r   )r   zip)	from_nameto_namer  r  s      @r   translate_chr_namer  X  s;    Iw//00L# # # # # r   c                 ^   | d         dk    rdS 	 |                                                                  d d         \  }}}nI# t          t          f$ r5 t	          d                    |                                                      w xY wt          |          t          |          }}|||fS )Nr   #)NNNr  zMalformed BED entry! {0}
)r  r  r  r:   r"   r   r>   )	bed_entryr  r.   r/   s       r   	bed_splitr  a  s    |s!!P%,,..4466rr:ucc
# P P P3::9;K;K;M;MNNOOOPe**c#hh3E5#s   2A AB	c                    	 |                      d          \  }}n# t          $ r | }d }Y nw xY w	 |                     d          \  }}t          |          dz
  t          |          }}n# t          t          f$ r d\  }}Y nw xY w|||fS )Nr\   r]   r6   )NN)r  r:   r>   AttributeError)r   r  intervalr.   r/   s        r   
ucsc_splitr  l  s     ,,s++xx   "^^C((
s%jj1nc#hhsJ' " " "!
sss"5#s    ,,9A* *BBc                    t          |          dk    rdS t          |          dk    r|d         d         |k    rdS dS t          |          dk    rd|d         d         |k    rdS |d         d         |k    r<|d         d         dk    r*|d         d         dz   |k    r|d         d         dk    rdS ndS t          |          dk    rdS t          d| z   dz   t          |          z             )a   Find inconsistent line lengths in the middle of an
    entry. Allow blank lines between entries, and short lines
    occurring at the last line of an entry. Returns boolean
    validating the entry.
    >>> check_bad_lines('chr0', [(10, 79)], 10)
    True
    >>> check_bad_lines('chr0', [(9, 79)], 10)
    False
    >>> check_bad_lines('chr0', [(9, 79), (10, 1)], 10)
    True
    r   Tr6   Fr_   z3Unhandled exception during fasta indexing at entry zEPlease report this issue at https://github.com/mdshw5/pyfaidx/issues )r8   RuntimeErrorr2   )r  r!  rw   s      r   r  r  z  s    9~~t	Y1		Q<?a45	Y1		Q<?a5q\!_!!il1o&:&:|A"a''IaLOa,?,?t5
9~~u
LuT^_9~~& ' ' 'r   c                     t          |                                                               dd          } t          j        dd|           S )a  
    From https://github.com/django/django/blob/efc3e32d6d7fb9bb41be73b80c8607b653c1fbd6/django/utils/text.py#L222-L232
    Return the given string converted to a string that can be used for a clean
    filename. Remove leading and trailing spaces; convert other spaces to
    underscores; and remove anything that is not an alphanumeric, dash,
    underscore, or dot.
    >>> get_valid_filename("HPV16_144-1.fa")
    'HPV16_144-1.fa'
    >>> get_valid_filename("chromosome 6.fa")
    'chromosome_6.fa'
     _z(?u)[^-\w.]r)   )r2   stripr;  rz   r{   )r  s    r   get_valid_filenamer     s;     	AsC((A6."a(((r   __main__)r)   )Ir   r  r   rz   r  sysr  ra   collectionsr   	itertoolsr   r   mathr   os.pathr   tempfiler   	threadingr	   importlib.metadatar
   r   importlib_metadatar   r   r   compile	dna_basesr:   r   r   r   r   r   r   r   r  r    r"   r$   r&   objectr(   r   r   ri  r  r  r  r  r  r   r   r  r  r  rR   invalid_characters_stringr2   	maketranscomplement_tabler  rM   r  r  r  r  r   r   doctesttestmodr   r   r   <module>r3     s     				 				  



   " " " " " " ) ) ) ) ) ) ) )             " " " " " "      +******* + + +********+ $ # # # # #MMMM   FFF gi  BJ?@@	: : : : :z : : :I I I I I I I I9 9 9 9 9 9 9 90 0 0 0 0G 0 0 05 5 5 5 5 5 5 5M M M M M M M M+ + + + +z + + +# # # # #) # # #    7   z! z! z! z! z!v z! z! z!z    
=JJJ	L 	L  $] ] ] ] ]F ] ] ]@H
 H
 H
 H
 H
& H
 H
 H
V               Fy# y# y# y# y#F y# y# y#x\ \ \ \ \5 \ \ \~' ' ' '       "  @6 C CE#JJC C C C C GG$:;; ==!2N14E!:< < '*   "      ' ' 'B) ) )  zNNNGO s$   A AA%A* *A43A4