o
    DfC                     @   s   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ eeZG d	d
 d
ZG dd dZdS )z
Parse adapter specifications
    N)Path)TypeOptionalListTupleIteratorAnyDict)xopen)FastaReader   )
AdapterFrontAdapterNonInternalFrontAdapterBackAdapterNonInternalBackAdapterAnywhereAdapterPrefixAdapterSuffixAdapterLinkedAdapterInvalidCharacterc                
   @   s   e Zd ZdZdedee dedefddZededefd	d
Zdd Z	dd Z
ededefddZededeee ef fddZdddddddddd	ZedefddZedd Zedd Zdd  ZdS )!AdapterSpecificationa@    # noqa: E501
    Description of a single non-linked adapter.

    These are the attributes:

    - name (None or str)
    - restriction (None, 'anchored', or 'noninternal')
    - sequence (nucleotide sequence as string)
    - parameters (dict with extra parameters such as 'max_errors', 'min_overlap')
    - cmdline_type ('front' for -a, 'back' for -g and 'anywhere' for -b)

    >>> AdapterSpecification.parse('a_name=ACGT;anywhere', 'back')
    AdapterSpecification(name='a_name', restriction=None, sequence='ACGT', parameters={'anywhere': True}, cmdline_type='back')
    namerestrictionsequencecmdline_typec                 C   s:   |dv sJ |dv sJ || _ || _|| _|| _|| _d S )N)Nanchorednoninternalfrontbackanywherer   r   r   
parametersr   )selfr   r   r   r#   r    r%   X/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cutadapt/parser.py__init__"   s   
zAdapterSpecification.__init__specc                 C   s$   |  ||\}}}}| |||||S )z<Factory for creating an instance from a string specification)_parse)clsr(   r   r   r   r   r#   r%   r%   r&   parse2   s   zAdapterSpecification.parsec                 C   s"   d | jj| j| j| j| j| jS )NzR{}(name={!r}, restriction={!r}, sequence={!r}, parameters={!r}, cmdline_type={!r}))format	__class____name__r   r   r   r#   r   r$   r%   r%   r&   __repr__8   s   zAdapterSpecification.__repr__c                 C   s<   | j |j ko| j|jko| j|jko| j|jko| j|jkS Nr"   )r$   otherr%   r%   r&   __eq__<   s   



zAdapterSpecification.__eq__returnc                 C   s   d}d}t d| D ]c}|dkrq
|du r,|dkrtd|dkr%td|}||7 }q
|dkrGt|}d|  kr>d	ksFn td
|q
t|trc|dkrTtd|dd |d |  }d}q
|dkrktdd}q
t|tsw|dkr{td|S )z
        Replace all occurrences of ``x{n}`` (where x is any character) with n
        occurrences of x. Raise ValueError if the expression cannot be parsed.

        >>> AdapterSpecification.expand_braces('TGA{5}CT')
        'TGAAAAACT'
         Nz([{}]){z""{" must be used after a character}z"}" cannot be used herer   i'  zValue {} invalidz"}" expectedzExpected "{"zUnterminated expression)resplit
ValueErrorintr,   
isinstance)r   resultprevsr%   r%   r&   expand_bracesE   s8   


z"AdapterSpecification.expand_bracesc                 C   s<   |  dd}d}t|dkr|\}} | }|  } || fS )z_
        Parse an adapter specification given as 'name=adapt' into 'name' and 'adapt'.
        =r   N)r:   lenstrip)r(   fieldsr   r%   r%   r&   _extract_namem   s   z"AdapterSpecification._extract_namemax_error_rate
max_errorsmin_overlapN)	eZ
error_raterG   orH   rI   r!   requiredoptionalc              	   C   s$  | d}t }|D ]n}| }|sq
|d\}}}|dkr'|dkr'td| }|| jvr7td|| j| durJ| j| }| j| dus>| }|dkrUd}nzt|}W n tyh   t	|}Y nw ||v rttd||||< q
d	|v rd
|v rtdd	|v rd|d
< |d	= |S )z/Parse key=value;key=value;key=value into a dict;rB   r5   zNo value givenzUnknown parameter {}NTzKey {} specified twicerM   rL   z>'optional' and 'required' cannot be specified at the same timeF)
r:   dictrD   	partitionr;   allowed_parametersKeyErrorr,   r<   float)r*   r(   rE   r>   fieldkeyequalsvaluer%   r%   r&   _parse_parameters   s@   



z&AdapterSpecification._parse_parametersc                 C   s  |dvrt dt d}|d\}}}| |\}}| }| |}| |}t|ddkr8|d|i fS d}|drGd	}|d
d }| dr[|durT|d}|	d}d}	|
drjd	}	|dd }| 
dr~|	durw|d}	|d}tt|tt|	 }
|
d
kr||dkr|	rt d|dkr|rt d|du s|	du sJ |dur|}n|	}|dkr|durt d||||fS )a1  
        Parse an adapter specification for a non-linked adapter (without '...')

        Allow:
        'back' and ADAPTER
        'back' and ADAPTERX
        'back' and ADAPTER$
        'front' and ADAPTER
        'front' and XADAPTER
        'front' and ^ADAPTER
        'anywhere' and ADAPTER
        r   z,cmdline_type must be front, back or anywherezYou cannot use multiple placement restrictions for an adapter at the same time. Choose one of ^ADAPTER, ADAPTER$, XADAPTER or ADAPTERXrN   Xr   N^r   r   r   xX$r8   r   zIAllowed placement restrictions for a 5' adapter are XADAPTER and ^ADAPTERr    zIAllowed placement restrictions for a 3' adapter are ADAPTERX and ADAPTER$r!   zPPlacement restrictions (with X, ^, $) not supported for 'anywhere' (-b) adapters)r;   rP   rF   rD   rX   rA   rC   
startswithupperlstripendswithrstripr<   bool)r*   r(   r   errormiddleZparameters_specr   r#   Zfront_restrictionZback_restrictionZn_placement_restrictionsr   r%   r%   r&   r)      sd   





zAdapterSpecification._parsec                 C   s   | dkr|du r
t S |dkrtS |dkrtS td|| dkr:|du r'tS |dkr-tS |dkr3tS td|| dks@J |du rFtS td	)
zA
        restriction: None, "anchored", or "noninternal"
        r   Nr   r   z,Value {} for a front restriction not allowedr    z+Value {} for a back restriction not allowedr!   z5No placement may be specified for "anywhere" adapters)	r   r   r   r;   r,   r   r   r   r   )r   r   r%   r%   r&   _restriction_to_class   s0   z*AdapterSpecification._restriction_to_classc                 C   s   |  | j| jS r1   )re   r   r   r/   r%   r%   r&   adapter_class  s   z"AdapterSpecification.adapter_class)r.   
__module____qualname____doc__strr   r'   classmethodr+   r0   r3   staticmethodrA   r   rF   rQ   rX   r)   re   rf   r%   r%   r%   r&   r      sH    
	' $
H
r   c                
   @   s   e Zd ZdZdd Zddededee d	efd
dZe	deded	e
eef fddZdedee ded	efddZdededee ded	ef
ddZddeded	ee fddZdee
eef  d	ee fddZdS )AdapterParserz
    Factory for Adapter classes that all use the same default parameters (error rate,
    indels etc.). The given **kwargs will be passed to the Adapter constructors.
    c                 K   s
   || _ d S r1   )default_parameters)r$   kwargsr%   r%   r&   r'      s   
zAdapterParser.__init__r    Nr(   r   r   r4   c                 C   st   |dvrt d||d\}}}|dkr#|r#|r#| ||||S |dkr1| |||\}}n|}| |||S )a  
        Parse an adapter specification not using ``file:`` notation and return
        an object of an appropriate Adapter class.

        name -- Adapter name if not included as part of the spec. (If spec is
        'name=ADAPTER', name will be 'name'.)

        cmdline_type -- describes which commandline parameter was used (``-a``
        is 'back', ``-b`` is 'anywhere', and ``-g`` is 'front').
        r   zcmdline_type cannot be {!r}z...)r;   r,   rP   _parse_linked_normalize_ellipsis_parse_not_linked)r$   r(   r   r   spec1rd   spec2r%   r%   r&   r)   $  s   zAdapterParser._parsers   rt   c                 C   s`   |dkrt d| s|dkr|}||fS t d|s,|dkr&d}| }||fS | }||fS t d)Nr!   z2No ellipsis ("...") allowed in "anywhere" adaptersr    zInvalid adapter specificationr   zExpected either spec1 or spec2)r;   )rs   rt   r   r(   r%   r%   r&   rq   ;  s   z!AdapterParser._normalize_ellipsisc                 C   s   t ||}| }|jddr|ttfv rd|jd< d|jv r%td| j	 }|
|j |d|j|d u r;|jn|d|S )	Nr!   FTZforce_anywhererL   zA'required' and 'optional' can only be used within linked adapters)r   r   r%   )r   r+   rf   r#   popr   r   r;   rn   copyupdater   r   )r$   r(   r   r   Zaspecrf   r#   r%   r%   r&   rr   R  s   


zAdapterParser._parse_not_linkedc                 C   s   |dkrt dt|d}t|d}|du r|j}|jdu}|jdu}| j }	|	|j | j }
|
|j |dkrDd}d}n|}|}|		d|}|
	d|}|
 |jfddi|	}|
 |jfddi|
}t|||||d	S )
z6Return a linked adapter from two specification stringsr!   z*'anywhere' (-b) adapters may not be linkedr   r    NTrL   r   )front_adapterback_adapterfront_requiredback_requiredr   )r;   r   r+   r   r   rn   rv   rw   r#   ru   rf   r   r   )r$   rs   rt   r   r   Z
front_specZ	back_specZfront_anchoredZback_anchoredZfront_parametersZback_parametersrz   r{   rx   ry   r%   r%   r&   rp   ^  s@   



zAdapterParser._parse_linkedc           	   
   c   s    | drFt|dd ddd+}t|}|D ]}|jdd}|r'|d nd}| j|j||dV  qW d   dS 1 s?w   Y  dS z| j||ddV  W dS  ty| } zt|	 rwd	
|d
 d
| }t|jd d |  d}~ww )aQ  
        Parse an adapter specification and yield appropriate Adapter classes.
        This works like the _parse_no_file() function above, but also supports the
        ``file:`` notation for reading adapters from an external FASTA
        file. Since a file can contain multiple adapters, this
        function is a generator.
        zfile:   Nrbr   )modethreadsr   )r   zA file exists named '{}'. zFTo use the sequences in that file as adapter sequences, write 'file:' z!before the path, as in 'file:{}'.
)r]   r
   r   r   r:   r)   r   r   r   existsr,   args)	r$   r(   r   fZfastarecordr   rJ   Zextra_messager%   r%   r&   r+     s.   
"zAdapterParser.parsetype_spec_pairsc                 C   s8   g }|D ]\}}|dvrt d|| || q|S )ap  
        Parse all three types of commandline options that can be used to
        specify adapters. adapters must be a list of (str, str) pairs, where the first is
        the adapter type (either 'front', 'back' or 'anywhere') and the second is the
        adapter specification given on the commandline

        Return a list of appropriate Adapter classes.
        >   r    r!   r   z,adapter type must be front, back or anywhere)r;   extendr+   )r$   r   adaptersr   r(   r%   r%   r&   parse_multi  s   	zAdapterParser.parse_multi)r    N)r    )r.   rg   rh   ri   r'   rj   r   r   r)   rl   r   rq   rr   r   rp   r   r+   r   r   r%   r%   r%   r&   rm     s      ",&rm   ) ri   r9   loggingpathlibr   typingr   r   r   r   r   r   r	   r
   Zdnaio.readersr   r   r   r   r   r   r   r   r   r   r   r   	getLoggerr.   loggerr   rm   r%   r%   r%   r&   <module>   s    $0
  