o
    Df4`                     @   s  d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZmZ dd	lmZmZ dd
lmZmZmZmZmZmZmZ ddlmZmZm Z m!Z! ddl"m#Z# dZ$G dd dZ%G dd deZ&G dd deZ'G dd de'Z(G dd de&Z)G dd de&Z*G dd de+Z,G dd de'Z-G dd de&Z.G d d! d!e&Z/G d"d# d#e&Z0G d$d% d%e&Z1G d&d' d'e+Z2G d(d) d)e&Z3G d*d+ d+e'Z4G d,d- d-e&Z5G d.d/ d/e&Z6G d0d1 d1e&Z7G d2d3 d3e&Z8G d4d5 d5e&Z9dS )6z
This module implements all the read modifications that cutadapt supports.
A modifier must be callable and typically implemented as a class with a
__call__ method.
    N)SimpleNamespace)SequenceListTupleOptionalSet)ABCabstractmethod)OrderedDict)record_names_matchr      )quality_trim_indexnextseq_trim_index)MultipleAdaptersSingleAdapterIndexedPrefixAdaptersIndexedSuffixAdaptersMatch	remainderAdapter)tokenize_bracesTokenizeErrorToken
BraceToken)reverse_complemented_sequence   c                   @   s    e Zd ZdZg dZdd ZdS )ModificationInfoa
  
    An object of this class is created for each read that passes through the pipeline.
    Any information (except the read itself) that needs to be passed from one modifier
    to one later in the pipeline or from one modifier to the filters is recorded here.
    matchesZoriginal_read
cut_prefix
cut_suffixis_rcc                 C   s"   g | _ || _d | _d | _d | _d S Nr   )selfread r%   [/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cutadapt/modifiers.py__init__!   s
   
zModificationInfo.__init__N)__name__
__module____qualname____doc__	__slots__r'   r%   r%   r%   r&   r      s    r   c                   @   s   e Zd ZedefddZdS )SingleEndModifierinfoc                 C      d S r"   r%   r#   r$   r.   r%   r%   r&   __call__*   s   zSingleEndModifier.__call__N)r(   r)   r*   r	   r   r1   r%   r%   r%   r&   r-   )   s    r-   c                	   @   s.   e Zd Zedededeeef fddZdS )PairedEndModifierinfo1info2returnc                 C   r/   r"   r%   r#   read1read2r3   r4   r%   r%   r&   r1   0   s   zPairedEndModifier.__call__N)r(   r)   r*   r	   r   r   DnaSequencer1   r%   r%   r%   r&   r2   /   s    
r2   c                   @   sH   e Zd ZdZdZdee dee fddZdd Zd	e	d
e	fddZ
dS )PairedEndModifierWrapperzL
    Wrapper for modifiers that work on both reads in a paired-end read
    T	modifier1	modifier2c                 C   s0   || _ || _| j du r| jdu rtddS dS )z9Set one of the modifiers to None to work on R1 or R2 onlyNzNot both modifiers may be None)
_modifier1
_modifier2
ValueError)r#   r;   r<   r%   r%   r&   r'   =   s
   z!PairedEndModifierWrapper.__init__c                 C      d | j| jS )Nz8PairedEndModifierWrapper(modifier1={!r}, modifier2={!r}))formatr=   r>   r#   r%   r%   r&   __repr__D      z!PairedEndModifierWrapper.__repr__r3   r4   c                 C   sL   | j d u r|| ||fS | jd u r|  |||fS |  ||| ||fS r"   )r=   r>   r6   r%   r%   r&   r1   H   s
   

z!PairedEndModifierWrapper.__call__N)r(   r)   r*   r+   Zpairedr   r-   r'   rC   r   r1   r%   r%   r%   r&   r:   7   s    r:   c                	   @   s   e Zd ZdZ			ddee dedee de	fd	d
Z
dd Zdd Zedee deee ee ee f fddZedee fddZedee fddZedee fddZdefddZdd ZdS ) AdapterCutterz
    Repeatedly find one of multiple adapters in reads.
    The number of times the search is repeated is specified by the
    times parameter.
    r   trimTadapterstimesactionindexc                 C   sr   || _ |dv s	J || _d| _tdd |D | _|r$t| || _nt|| _|dkr5|dkr7tddS dS )	aT  
        action -- What to do with a found adapter:
          None: Do nothing, only update the ModificationInfo appropriately
          "trim": Remove the adapter and down- or upstream sequence depending on adapter type
          "mask": Replace the part of the sequence that would have been removed with "N" bases
          "lowercase": Convert the part of the sequence that would have been removed to lowercase
          "retain": Like "trim", but leave the adapter sequence itself in the read

        index -- if True, an adapter index (for multiple adapters) is created if possible
        )rF   mask	lowercaseretainNr   c                 s       | ]	}||  fV  qd S r"   Zcreate_statistics.0ar%   r%   r&   	<genexpr>l       z)AdapterCutter.__init__.<locals>.<genexpr>rM   r   z*'retain' cannot be combined with times > 1N)	rH   rI   with_adaptersr
   adapter_statisticsr   _regroup_into_indexed_adaptersrG   r?   )r#   rG   rH   rI   rJ   r%   r%   r&   r'   W   s   
zAdapterCutter.__init__c                 C   s   d | j| j| jS )Nz3AdapterCutter(adapters={!r}, times={}, action={!r}))rA   rG   rH   rI   rB   r%   r%   r&   rC   t   s   zAdapterCutter.__repr__c                 C   s   |  |\}}}t|tkst|tkr?|}t|dkr$|t| n|| t|dkr8|t| |S || |S |S Nr   )_split_adapterslenINDEXING_THRESHOLDappendr   extendr   )r#   rG   prefixsuffixsingleresultr%   r%   r&   rW   x   s   

z,AdapterCutter._regroup_into_indexed_adaptersr5   c                 C   sV   g }g }g }| D ]}t |r|| qt|r || q|| q|||fS )a  
        Split adapters into three different categories so that they can possibly be used
        with a MultiAdapter. Return a tuple (prefix, suffix, other), where
        - prefix is a list of all anchored 5' adapters that MultiAdapter would accept
        - suffix is a list of all anchored 3' adapters that MultiAdapter would accept
        - other is a list of all remaining adapters.
        )r   Zis_acceptabler\   r   )rG   r^   r_   otherrR   r%   r%   r&   rY      s   


zAdapterCutter._split_adaptersr   c                 C   s   |d   \}}| || S )N)Zretained_adapter_interval)r$   r   startstopr%   r%   r&   trim_but_retain_adapter   s   z%AdapterCutter.trim_but_retain_adapterc                 C   sD   t |\}}| d d  }d| | j||  dt| |   |_|S )NN)r   sequencerZ   r$   r   rd   re   ra   r%   r%   r&   masked_read   s   zAdapterCutter.masked_readc                 C   sT   t |\}}| d d  }| jd |  | j||   | j|d    |_|S r"   )r   rh   lowerupperri   r%   r%   r&   lowercased_read   s   zAdapterCutter.lowercased_readr.   c                 C   sL   |  |\}}|r|  jd7  _|D ]}|| j|j  q|j| |S rX   )match_and_trimrU   update_statisticsrV   adapterr   r]   )r#   r$   r.   trimmed_readr   matchr%   r%   r&   r1      s   zAdapterCutter.__call__c                 C   s
  g }| j dkr|j |_|}t| jD ]}| j|j}|du r# n|| ||}q|s4|g fS | j dkr>	 ||fS | j dkrM| 	||}||fS | j dkr\| 
||}||fS | j dkrv| ||}t|jt|ksrJ ||fS | j du r|dd }||fS )a  
        Search for the best-matching adapter in a read, perform the requested action
        ('trim', 'mask' etc. as determined by self.action) and return the
        (possibly) modified read.

        *self.times* adapter removal rounds are done. During each round,
        only the best-matching adapter is trimmed. If no adapter was found in a round,
        no further rounds are attempted.

        Return a pair (trimmed_read, matches), where matches is a list of Match instances.
        rL   NrF   rM   rK   )rI   rh   rl   rangerH   rG   match_tor\   trimmedrf   rj   rm   rZ   )r#   r$   r   rq   _rr   r%   r%   r&   rn      s8   



	


zAdapterCutter.match_and_trimN)r   rF   T)r(   r)   r*   r+   r   r   intr   strboolr'   rC   rW   staticmethodr   r   r   rY   r   rf   rj   rm   r   r1   rn   r%   r%   r%   r&   rE   P   s<    	
	
	rE   c                   @   s6   e Zd ZdZddedee fddZdefdd	Z	d
S )ReverseComplementerz4Trim adapters from a read and its reverse complement rcadapter_cutter	rc_suffixc                 C   s   || _ d| _|| _dS )zb
        rc_suffix -- suffix to add to the read name if sequence was reverse-complemented
        r   N)r}   reverse_complemented_suffix)r#   r}   r~   r%   r%   r&   r'      s   
zReverseComplementer.__init__r.   c                 C   s   t |}| j|\}}| j|\}}tdd |D }tdd |D }	|	|k}
|
rK|  jd7  _|s7J ||}}d|_| jrJ| j| j7  _nd|_||}}|r}| j jd7  _|D ]}| jj	|j
 }|| | jt|
7  _q_|j| |S )Nc                 s       | ]}|j V  qd S r"   r   rQ   mr%   r%   r&   rS          z/ReverseComplementer.__call__.<locals>.<genexpr>c                 s   r   r"   r   r   r%   r%   r&   rS     r   r   TF)r   r}   rn   sumr   r!   r   namerU   rV   rp   ro   ry   r   r]   )r#   r$   r.   Zreverse_readZforward_trimmed_readZforward_matchesZreverse_trimmed_readZreverse_matchesZforward_match_countZreverse_match_countZuse_reverse_complementrq   r   rr   statsr%   r%   r&   r1      s0   


zReverseComplementer.__call__N)r|   )
r(   r)   r*   r+   rE   r   rx   r'   r   r1   r%   r%   r%   r&   r{      s    r{   c                   @      e Zd ZdS )PairedAdapterCutterErrorNr(   r)   r*   r%   r%   r%   r&   r         r   c                       s2   e Zd ZdZd	 fdd	Zdd Zdd Z  ZS )
PairedAdapterCutterz=
    A Modifier that trims adapter pairs from R1 and R2.
    rF   c                    s   t    t|t|krtdt|t||stdt|| _dd t|D | _t|| _	|| _
d| _ddg| _tdd |D | jd< td	d |D | jd
< dS )ag  
        adapters1 -- list of Adapters to be removed from R1
        adapters2 -- list of Adapters to be removed from R1

        Both lists must have the same, non-zero length.
         read pair is trimmed if adapters1[i] is found in R1 and adapters2[i] in R2.

        action -- What to do with a found adapter: None, 'trim', 'lowercase' or 'mask'
        zXThe number of reads to trim from R1 and R2 must be the same. Given: {} for R1, {} for R2zNo adapters givenc                 S   s   i | ]\}}||qS r%   r%   )rQ   irR   r%   r%   r&   
<dictcomp>4  s    z0PairedAdapterCutter.__init__.<locals>.<dictcomp>r   Nc                 s   rN   r"   rO   rP   r%   r%   r&   rS   9  rT   z/PairedAdapterCutter.__init__.<locals>.<genexpr>c                 s   rN   r"   rO   rP   r%   r%   r&   rS   :  rT   r   )superr'   rZ   r   rA   r   
_adapters1	enumerate_adapter_indices
_adapters2rI   rU   rV   r
   )r#   Z	adapters1Z	adapters2rI   	__class__r%   r&   r'   "  s    




zPairedAdapterCutter.__init__c                 C   r@   )Nz3PairedAdapterCutter(adapters1={!r}, adapters2={!r}))rA   r   r   rB   r%   r%   r&   rC   <  rD   zPairedAdapterCutter.__repr__c                 C   sl  | j |j}|du r||fS |j}| j| j|  }||j}|du r(||fS |  jd7  _g }	tddg||g||gD ]j\}
}}|}| jdkrO|j	 |_|
|}|| j|
 |j  | jdkren=| jdkrrt||g}n0| jdkrt||g}t|jt|ksJ n| jdkrt||g}n| jdu r|dd }|	| q=|j| |j| |	S )z	
        Nr   r   rL   rF   rK   rM   )r   rt   rh   rp   r   r   rU   ziprI   rl   ru   ro   rV   rE   rj   rm   rZ   rf   r\   r   )r#   r7   r8   r3   r4   Zmatch1Zadapter1Zadapter2Zmatch2ra   r   rr   r$   rq   r%   r%   r&   r1   @  s>   "






zPairedAdapterCutter.__call__)rF   )r(   r)   r*   r+   r'   rC   r1   __classcell__r%   r%   r   r&   r     s
    r   c                   @   s,   e Zd ZdZdefddZdefddZdS )	UnconditionalCuttera  
    A modifier that unconditionally removes the first n or the last n bases from a read.

    If the length is positive, the bases are removed from the beginning of the read.
    If the length is negative, the bases are removed from the end of the read.
    lengthc                 C   
   || _ d S r"   r   r#   r   r%   r%   r&   r'   o     
zUnconditionalCutter.__init__r.   c                 C   sX   | j dkr|jd | j  |_|| j d  S | j dk r*|j| j d  |_|d | j  S d S Nr   )r   rh   r   r    r0   r%   r%   r&   r1   r  s   

zUnconditionalCutter.__call__N)r(   r)   r*   r+   rw   r'   r   r1   r%   r%   r%   r&   r   h  s    r   c                   @   &   e Zd ZdZdd ZdefddZdS )LengthTagModifierz5
    Replace "length=..." strings in read names.
    c                 C   s   t d| d | _|| _d S )Nz\bz[0-9]*\b)recompileregex
length_tag)r#   r   r%   r%   r&   r'     s   
zLengthTagModifier.__init__r.   c                 C   sD   |d d  }|j | jdkr | j| jtt|j |j |_ |S r   )r   findr   r   subrx   rZ   rh   r0   r%   r%   r&   r1     s   "zLengthTagModifier.__call__Nr(   r)   r*   r+   r'   r   r1   r%   r%   r%   r&   r   {  s    r   c                   @   r   )SuffixRemoverz0
    Remove a given suffix from read names.
    c                 C   r   r"   )r_   )r#   r_   r%   r%   r&   r'     r   zSuffixRemover.__init__r.   c                 C   s6   |d d  }|j | jr|j d t| j  |_ |S r"   )r   endswithr_   rZ   r0   r%   r%   r&   r1     s   zSuffixRemover.__call__Nr   r%   r%   r%   r&   r     s    r   c                   @   s    e Zd ZdZdd Zdd ZdS )PrefixSuffixAdderz1
    Add a suffix and a prefix to read names
    c                 C   s   || _ || _d S r"   )r^   r_   )r#   r^   r_   r%   r%   r&   r'     s   
zPrefixSuffixAdder.__init__c                 C   sL   |d d  }|j r|j d jjnd}| jd||j | jd| |_|S )Nrc   
no_adapterz{name})r   rp   r   r^   replacer_   )r#   r$   r.   adapter_namer%   r%   r&   r1     s   zPrefixSuffixAdder.__call__N)r(   r)   r*   r+   r'   r1   r%   r%   r%   r&   r     s    r   c                   @   r   )InvalidTemplateNr   r%   r%   r%   r&   r     r   r   c                   @   s   e Zd ZdZh dZdefddZdd Zede	e
 d	ee d
dfddZeded
eeef fddZdeded
efddZdS )Renamera  
    Rename reads using a template

    The template string can contain the following placeholders:

    - {header} -- full, unchanged header
    - {id} -- the part of the header before the first whitespace
    - {comment} -- the part of the header after the ID, excluding initial whitespace
    - {cut_prefix} -- prefix removed by UnconditionalCutter (with positive length argument)
    - {cut_suffix} -- suffix removed by UnconditionalCutter (with negative length argument)
    - {adapter_name} -- name of the *last* adapter match or no_adapter if there was none
    - {rc} -- the string 'rc' if the read was reverse complemented (with --revcomp) or '' otherwise
    >   r   r   commentrcheaderr    idtemplatec              
   C   sV   z	t t|| _W n ty } ztd||d }~ww | | j| j || _d S NzError in template '{}': {})	listr   _tokensr   r   rA   raise_if_invalid_variable	variables	_templater#   r   er%   r%   r&   r'     s   
zRenamer.__init__c                 C   s   d| j  dS )Nz	Renamer('z'))r   rB   r%   r%   r&   rC     s   zRenamer.__repr__tokensallowedr5   Nc                 C   s6   | D ]}t |ts
q|j}||vrtd|qd S )Nz/Error in template: Variable '{}' not recognized)
isinstancer   valuer   rA   )r   r   tokenr   r%   r%   r&   r     s   
z!Renamer.raise_if_invalid_variable	read_namec                 C   s0   | j dd}t|dkr|d |d fS | dfS )z0Parse read header and return (id, comment) tupler   )maxsplit   r    )splitrZ   )r   fieldsr%   r%   r&   
parse_name  s   zRenamer.parse_namer$   r.   c              	   C   sh   |  |j\}}| jj|j|||jr|jnd|jr|jnd|jr'|jd jjnd|jr-dndd|_|S )Nr   rc   r   r   )r   r   r   r   r    r   r   )	r   r   r   rA   r   r    r   rp   r!   )r#   r$   r.   id_r   r%   r%   r&   r1     s   	zRenamer.__call__)r(   r)   r*   r+   r   rx   r'   rC   rz   r   r   r   r   r   r   r9   r   r1   r%   r%   r%   r&   r     s    
 
r   c                   @   s   e Zd ZdZdefddZedee fddZde	d	e	d
e
de
dee	e	f f
ddZdedededededed
e
de
deeef fddZdS )PairedEndRenamerai  
    Rename paired-end reads using a template. The template is applied to both
    R1 and R2, and the same template variables as in the (single-end) renamer
    are allowed. However,
    these variables are evaluated separately for each read. For example, if `{comment}`
    is used, it gets replaced with the R1 comment in the R1 header, and with the R2
    comment in the R2 header.

    Additionally, all template variables except `id` can be used in the read-specific
    forms `{r1.variablename}` and `{r2.variablename}`. For example, `{r1.comment}`
    always gets replaced with the R1 comment, even in R2.
    r   c              
   C   sX   z	t t|| _W n ty } ztd||d }~ww t| j|   || _	d S r   )
r   r   r   r   r   rA   r   r   _get_allowed_variablesr   r   r%   r%   r&   r'     s   
zPairedEndRenamer.__init__r5   c                  C   sF   t jdh dhB } t jddh D ]}| d|  | d|  q| S )Nr   rnr   zr1.zr2.)r   r   add)r   vr%   r%   r&   r     s
   z'PairedEndRenamer._get_allowed_variablesr7   r8   r3   r4   c              
   C   s   t |j\}}t |j\}}t|j|jstd||| j|||||j|j||d\}	}
t |	d }t |
d }t|	|
sLtd||||	|_|
|_||fS )Nz*Input read IDs not identical: '{}' != '{}')id1id2comment1comment2header1header2r3   r4   r   zcAfter renaming R1 and R2, their IDs are no longer identical: '{}' != '{}'. Original read ID: '{}'. )r   r   r   r   r?   rA   get_new_headersr   )r#   r7   r8   r3   r4   r   r   r   r   name1name2Znew_id1Znew_id2r%   r%   r&   r1     s0   



zPairedEndRenamer.__call__r   r   r   r   r   r   c	              
   C   s   g }	||||f||||ffD ](\}
}}}|	 t|||jr|jnd|jr&|jnd|jr1|jd jjndd q| jjd
|dd|	d t	d
i |	d t	d
i |	d d}| jjd
|d	d|	d t	d
i |	d t	d
i |	d d}||fS )Nr   rc   r   )r   r   r   r    r   r   )r   r   r   )r1r2r   r%   )
r\   dictr   r    r   rp   r   r   rA   r   )r#   r   r   r   r   r   r   r3   r4   dr   r   r   r.   r   r   r%   r%   r&   r   ,  s>   	

z PairedEndRenamer.get_new_headersN)r(   r)   r*   r+   rx   r'   rz   r   r   r9   r   r   r1   r   r%   r%   r%   r&   r     sF    

	

r   c                   @   s(   e Zd ZdZd	ddZdefddZdS )

ZeroCapperz:
    Change negative quality values of a read to zero
    !   c                 C   s.   |}t dttt|t|| | _d S )Nr   )rx   	maketransjoinmapchrrs   zero_cap_trans)r#   Zquality_baseZqbr%   r%   r&   r'   Y  s   *zZeroCapper.__init__r.   c                 C   s    |d d  }|j | j|_ |S r"   )	qualities	translater   r0   r%   r%   r&   r1   ]  s   zZeroCapper.__call__N)r   r   r%   r%   r%   r&   r   U  s    
r   c                   @   "   e Zd Zdd ZdefddZdS )NextseqQualityTrimmerc                 C   s   || _ || _d| _d S r   )cutoffbasetrimmed_bases)r#   r   r   r%   r%   r&   r'   d  s   
zNextseqQualityTrimmer.__init__r.   c                 C   s2   t || j| j}|  jt|| 7  _|d | S r"   )r   r   r   r   rZ   )r#   r$   r.   re   r%   r%   r&   r1   i  s   zNextseqQualityTrimmer.__call__Nr(   r)   r*   r'   r   r1   r%   r%   r%   r&   r   c  s    r   c                   @   r   )QualityTrimmerc                 C   s   || _ || _|| _d| _d S r   )cutoff_frontcutoff_backr   r   )r#   r   r   r   r%   r%   r&   r'   p  s   
zQualityTrimmer.__init__r.   c                 C   s@   t |j| j| j| j\}}|  jt|||  7  _||| S r"   )r   r   r   r   r   r   rZ   )r#   r$   r.   rd   re   r%   r%   r&   r1   v  s   zQualityTrimmer.__call__Nr   r%   r%   r%   r&   r   o  s    r   c                   @   r   )	ShortenerzUnconditionally shorten a read to the given length

    If the length is positive, the bases are removed from the end of the read.
    If the length is negative, the bases are removed from the beginning of the read.
    c                 C   r   r"   r   r   r%   r%   r&   r'     r   zShortener.__init__r.   c                 C   s&   | j dkr|d | j  S || j d  S r   r   r0   r%   r%   r&   r1     s   
zShortener.__call__Nr   r%   r%   r%   r&   r   |  s    r   c                   @   r   )NEndTrimmerz(Trims Ns from the 3' and 5' end of readsc                 C   s   t d| _t d| _d S )Nz^N+zN+$)r   r   
start_trimend_trimrB   r%   r%   r&   r'     s   zNEndTrimmer.__init__r.   c                 C   sN   |j }| j|}| j|}|r| nd}|r| nt|}||| S r   )rh   r   rr   r   searchendrd   rZ   )r#   r$   r.   rh   Z	start_cutZend_cutr%   r%   r&   r1     s   zNEndTrimmer.__call__Nr   r%   r%   r%   r&   r     s    r   ):r+   r   typesr   typingr   r   r   r   r   abcr   r	   collectionsr
   Zdnaior   r9   Zqualtrimr   r   rG   r   r   r   r   r   r   r   Z	tokenizerr   r   r   r   utilsr   r[   r   r-   r2   r:   rE   r{   	Exceptionr   r   r   r   r   r   r   r   r   r   r   r   r   r   r%   r%   r%   r&   <module>   sB    $  *KEd