o
    Df8                     @   s  d Z g dZddlZddlmZmZ ddlmZ ddlZddlm	Z	m
Z
mZmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZmZmZmZ ddlmZmZ ddl m!Z" ddl#m$Z$ dddddedde
e%eef de	e
e%eef  de	e% de&de%de	e& de
eeeeddddddf
 fd d!Z'd"e%de	e% fd#d$Z(dddd%d&e
e%eef de	e% de%de	e& de
eeeef f
d'd(Z)d)ede	e% fd*d+Z*G d,d dZ+G d-d dZ,G d.d dZ-G d/d de-Z.G d0d dZ/G d1d de/Z0dS )2z@
Sequence I/O: Read and write FASTA and FASTQ files efficiently
)openSequenceFastaReaderFastaWriterFastqReaderFastqWriterUnknownFileFormatFileFormatErrorFastaFormatErrorFastqFormatErrorInterleavedSequenceReaderInterleavedSequenceWriterPairedSequenceReaderread_chunksread_paired_chunksrecord_names_match__version__    N)fspathPathLike)	ExitStack)OptionalUnionBinaryIOTupleIterator)xopen   )r   r   )r   r   )r   r   )r   r   r	   r
   )r   r   )version)_is_pathFr)file2
fileformatinterleavedmode	qualitiesopenerfile1r    r!   r"   r#   r$   returnr   PairedSequenceWriterPairedSequenceAppenderr   r   InterleavedSequenceAppenderc                C   s   |dvrt d|r|durt d|durD|dv r"| |kr"t d|dkr.t| |||dS |d	kr;t| ||||dS t| ||||dS |re|dkrQt| ||dS |d	kr]t| |||dS t| |||dS t| ||||d
S )a  
    Open sequence files in FASTA or FASTQ format for reading or writing. This is
    a factory that returns an instance of one of the ...Reader or ...Writer
    classes also defined in this module.

    file1, file2 -- Paths to regular or compressed files or file-like
        objects (as str or as pathlib.Path). Use only file1 if data is single-end.
        If sequences are paired, use also file2.

    mode -- Either 'r' for reading, 'w' for writing or 'a' for appending.

    interleaved -- If True, then file1 contains interleaved paired-end data.
        file2 must be None in this case.

    fileformat -- If set to None, the file format is autodetected from the file name
        extension. Set to 'fasta' or 'fastq' to not auto-detect.

    qualities -- When mode is 'w' and fileformat is None, this can be set to
        True or False to specify whether the written sequences will have quality
        values. This is is used in two ways:
        * If the output format cannot be determined (unrecognized extension
          etc), no exception is raised, but fasta or fastq format is chosen
          appropriately.
        * When False (no qualities available), an exception is raised when the
          auto-detected output format is FASTQ.

    opener -- A function that is used to open file1 and file2 if they are not
        already open file-like objects. By default, xopen is used, which can
        also open compressed file formats.
    r   waMode must be 'r', 'w' or 'a'Nz+When interleaved is set, file2 must be Nonewaz)The paired-end output files are identicalr   )r%   r,   r%   r!   r#   r$   )
ValueErrorr   r(   r)   r   r   r*   _open_single)r&   r    r!   r"   r#   r$   r%    r3   W/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dnaio/__init__.pyr   *   s*   3
r   namec                 C   sp   |   } dD ]}| |r| dt|  }  nqtj| \} }|dv r'dS |dv s4|dkr6| dr6dS dS )	ze
    name -- file name

    Return 'fasta', 'fastq' or None if the format could not be detected.
    )z.gzz.xzz.bz2N)z.fastaz.faz.fnaz.csfastaz.csfafasta)z.fastqz.fqz.txt	_sequencefastq)lowerendswithlenospathsplitext)r5   extr3   r3   r4   _detect_format_from_namey   s   
r@   )r!   r#   r$   file_or_pathc                C   s  |dvrt dt| rt| }|||d }d}n"|dkr't| ds't d| }t|dr8t|jtr8|j}nd	}d
}|dkrBtnt}|dkrJt	nt
}	tj||dtj|	|dd}
|r{z|
|  }W ||S  tyz   |  td|w |d	urt|}|d	u r|dkr|d	ur|rdnd}|dkr|d	u rt|}|d	u rt|dt|}|  td||d	u r|dksJ |d	u rdnd}|  td| |dkr|dv r|d
u r|  t d|
| |S )zG
    Open a single sequence file. See description of open() above.
    r+   r.   bTr   readintozQWhen passing in an open file-like object, it must have been opened in binary moder5   NF)Z_close_file)r8   r6   z:File format {!r} is unknown (expected 'fasta' or 'fastq').r,   r8   r6   zCould not determine whether file "{}" is FASTA or FASTQ. The file extension was not available or not recognized and the first character in the file is unexpected.z. because the output file name is not available z=Auto-detection of the output file format (FASTA/FASTQ) failedr/   zDOutput format cannot be FASTQ since no quality values are available.)r1   r   r   hasattr
isinstancer5   strr   r   r   r   	functoolspartialr9   KeyErrorcloser   formatr@   _detect_format_from_contentgetattrrepr)rA   r%   r!   r#   r$   r=   fileZ
close_fileZfastq_handlerZfasta_handlerhandlershandlerr5   extrar3   r3   r4   r2      sp   r2   rP   c                 C   sX   |   r| d}|  dkr| dd n	| ddd }ddddd}||dS )z)
    Return 'fasta', 'fastq' or None
    r   r   r8   r6   )   @   >   #    N)seekablereadtellseekpeekget)rP   
first_charformatsr3   r3   r4   rM      s   
rM   c                   @   s   e Zd ZdZdZdefdeeee	f deeee	f de
e fddZd	efd
dZd	eeeef  fddZdddZdd Zdd ZdS )r   z
    Read paired-end reads from two files.

    Wraps two BinaryFileReader instances, making sure that reads are properly
    paired.
    TNr&   r    r!   c                 C   sl   t  $}|t|||d| _|t|||d| _| j| _W d    n1 s*w   Y  | jj| _d S N)r%   r!   )	r   enter_contextr2   reader1reader2pop_allrK   _closedelivers_qualities)selfr&   r    r!   r%   stackr3   r3   r4   __init__   s   zPairedSequenceReader.__init__r'   c                 C   s   d | j| jS )Nz(PairedSequenceReader(file1={}, file2={}))rL   rc   rd   rh   r3   r3   r4   __repr__   s   zPairedSequenceReader.__repr__c                 c   s    t | jt | j}}	 zt|}W n ty1   zt| tdddd ty0   Y Y dS w w zt|}W n tyF   tddddw t|j|js[td|j|jddd||fV  q)zY
        Iterate over the paired reads. Each item is a pair of Sequence objects.
        TzKReads are improperly paired. There are more reads in file 2 than in file 1.NlinezKReads are improperly paired. There are more reads in file 1 than in file 2.zTReads are improperly paired. Read name '{}' in file 1 does not match '{}' in file 2.)	iterrc   rd   nextStopIterationr   r   r5   rL   )rh   Zit1Zit2r1r2r3   r3   r4   __iter__  sL   

zPairedSequenceReader.__iter__c                 C      |    d S Nrf   rk   r3   r3   r4   rK   "     zPairedSequenceReader.closec                 C      | S rv   r3   rk   r3   r3   r4   	__enter__%     zPairedSequenceReader.__enter__c                 G   ru   rv   rK   )rh   excr3   r3   r4   __exit__(  rx   zPairedSequenceReader.__exit__r'   N__name__
__module____qualname____doc__Zpairedr   r   rG   r   r   r   rj   rl   r   r   r   rt   rK   rz   r~   r3   r3   r3   r4   r      s"    

c                   @   s|   e Zd ZdZdZdefdeeee	f de
e fddZdefd	d
Zdeeeef  fddZdddZdd Zdd ZdS )r   z?
    Read paired-end reads from an interleaved FASTQ file.
    TNrP   r!   c                 C   s4   t |||d}t|ttfsJ || _| jj| _d S ra   )r2   rF   r   r   readerrg   )rh   rP   r!   r%   r   r3   r3   r4   rj   2  s   z"InterleavedSequenceReader.__init__r'   c                 C   s   d | jS )NzInterleavedSequenceReader({}))rL   r   rk   r3   r3   r4   rl   =  rx   z"InterleavedSequenceReader.__repr__c              	   c   s|    t | j}|D ]3}zt|}W n ty"   td|jd dd w t|j|js6td|j|jd d||fV  qd S )NzCInterleaved input file incomplete: Last record {!r} has no partner.rm   zLReads are improperly paired. Name {!r} (first) does not match {!r} (second).)ro   r   rp   rq   r   rL   r5   r   )rh   itrr   rs   r3   r3   r4   rt   @  s(   

z"InterleavedSequenceReader.__iter__c                 C      | j   d S rv   )r   rK   rk   r3   r3   r4   rK   O     zInterleavedSequenceReader.closec                 C   ry   rv   r3   rk   r3   r3   r4   rz   R  r{   z#InterleavedSequenceReader.__enter__c                 G   ru   rv   r|   rh   argsr3   r3   r4   r~   U  rx   z"InterleavedSequenceReader.__exit__r   r   r3   r3   r3   r4   r   ,  s    

c                
   @   s   e Zd ZdZddefdeeeef deeeef de	e de	e
 fdd	Zd
efddZdddZdddZdd Zdd ZdS )r(   r,   r8   Nr&   r    r!   r$   c              
   C   sx   t  /}|  |  |t|||| j|d| _|t|||| j|d| _| j| _W d    d S 1 s5w   Y  d S Nr0   )	r   rb   r2   _mode_writer1_writer2re   rK   rf   )rh   r&   r    r!   r$   r%   ri   r3   r3   r4   rj   \  s   "zPairedSequenceWriter.__init__r'   c                 C   s   d | jj| j| jS )Nz
{}({}, {}))rL   	__class__r   r   r   rk   r3   r3   r4   rl   o  s   zPairedSequenceWriter.__repr__c                 C   s   | j | | j| d S rv   )r   writer   rh   read1read2r3   r3   r4   r   r     zPairedSequenceWriter.writec                 C   ru   rv   rw   rk   r3   r3   r4   rK   v  rx   zPairedSequenceWriter.closec                 C   ry   rv   r3   rk   r3   r3   r4   rz   y     zPairedSequenceWriter.__enter__c                 G   ru   rv   r|   r   r3   r3   r4   r~   }  rx   zPairedSequenceWriter.__exit__r   )r   r   r   r   r   r   rG   r   r   r   boolrj   rl   r   rK   rz   r~   r3   r3   r3   r4   r(   Y  s&    


c                   @      e Zd ZdZdS )r)   r-   Nr   r   r   r   r3   r3   r3   r4   r)         c                   @   s   e Zd ZdZdZddefdeeee	f de
e de
e fdd	Zd
efddZdeded
dfddZdddZdd Zdd ZdS )r   zF
    Write paired-end reads to an interleaved FASTA or FASTQ file
    r,   r8   NrP   r!   r$   c                 C   s0   t |||| j|d}t|ttfsJ || _d S r   )r2   r   rF   r   r   _writer)rh   rP   r!   r$   r%   writerr3   r3   r4   rj     s
   
z"InterleavedSequenceWriter.__init__r'   c                 C   s   d | jj| jS )Nz{}({}))rL   r   r   r   rk   r3   r3   r4   rl     s   z"InterleavedSequenceWriter.__repr__r   r   c                 C   s   | j | | j | d S rv   )r   r   r   r3   r3   r4   r     r   zInterleavedSequenceWriter.writec                 C   r   rv   )r   rK   rk   r3   r3   r4   rK     r   zInterleavedSequenceWriter.closec                 C   ry   rv   r3   rk   r3   r3   r4   rz     r   z#InterleavedSequenceWriter.__enter__c                 G   ru   rv   r|   r   r3   r3   r4   r~     rx   z"InterleavedSequenceWriter.__exit__r   )r   r   r   r   r   r   r   rG   r   r   r   r   rj   rl   r   r   rK   rz   r~   r3   r3   r3   r4   r     s$    

c                   @   r   )r*   r-   Nr   r3   r3   r3   r4   r*     r   )1r   __all__r<   r   r   
contextlibr   rH   typingr   r   r   r   r   r   Z_corer   r   readersr   r   Zwritersr   r   
exceptionsr   r   r	   r
   chunksr   r   _versionr   r   Z_utilr   rG   r   r   r@   r2   rM   r   r   r(   r)   r   r*   r3   r3   r3   r4   <module>   s    	
O
KB-($