o
    Dfk                     @   s2  d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlmZmZ ddlmZ e ZG d	d
 d
eZG dd deZdd ZG dd dZG dd dZG dd deZG dd deeZG dd deZG dd deZdgfdefddZG dd deZ G d d! d!e eZ!G d"d# d#e!eZ"G d$d% d%e"Z#G d&d' d'e"Z$G d(d) d)e"Z%G d*d+ d+e#Z&G d,d- d-e$Z'G d.d/ d/e&Z(G d0d1 d1e'Z)G d2d3 d3eZ*G d4d5 d5e!Z+G d6d7 d7e Z,G d8d9 d9e eZ-G d:d; d;e-Z.G d<d= d=e-Z/d>d? Z0d@e	e dee1e1f fdAdBZ2dS )Cz
Adapter finding and trimming classes

The ...Adapter classes are responsible for finding adapters.
The ...Match classes trim the reads.
    N)Enum)defaultdict)OptionalTupleSequenceDictAnyList)ABCabstractmethod   )alignc                   @   s   e Zd ZdS )InvalidCharacterN)__name__
__module____qualname__ r   r   Z/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cutadapt/adapters.pyr      s    r   c                   @   s^   e Zd ZejejB ejB ZejejB ejB Z	ejZ
ejZejejB ZejejB ZejZdZdS )WherelinkedN)r   r   r   r   ZSTART_WITHIN_SEQ2ZSTOP_WITHIN_SEQ2ZSTOP_WITHIN_SEQ1BACKZSTART_WITHIN_SEQ1FRONTPREFIXSUFFIXFRONT_NOT_INTERNALBACK_NOT_INTERNALZ
SEMIGLOBALANYWHERELINKEDr   r   r   r   r      s    r   c                   C   s   t tS N)r   intr   r   r   r   returns_defaultdict_int%   s   r    c                   @   sR   e Zd ZdZdddZdd Zdefd	d
Zedd Z	de
dee
 fddZdS )EndStatisticsz!Statistics about the 5' or 3' endadapterSingleAdapterc                 C   sZ   |j | _ |j| _|j| _|j| _|j| _tt| _dddddd| _	t
|tttf| _d S )Nr   ACGT )max_error_ratesequenceeffective_lengthadapter_wildcardshas_wildcardsallows_partial_matchesr   r    errorsadjacent_bases
isinstanceFrontAdapterNonInternalFrontAdapterPrefixAdapter_remove_prefix)selfr"   r   r   r   __init__/   s   
zEndStatistics.__init__c                 C   s&   dd | j  D }d| j|| jS )Nc                 S   s   i | ]	\}}|t |qS r   )dict).0kvr   r   r   
<dictcomp><   s    z*EndStatistics.__repr__.<locals>.<dictcomp>z>EndStatistics(max_error_rate={}, errors={}, adjacent_bases={}))r0   itemsformatr*   r1   )r7   r0   r   r   r   __repr__;   s   zEndStatistics.__repr__otherc                 C   s   t || js
td| j|jks| j|jks| j|jkr tddD ]}| j|  |j| 7  < q"|j	 D ]\}}|D ]}| j| |  |j| | 7  < q<q6| S )NzCannot comparez+Incompatible EndStatistics, cannot be addedr$   )
r2   	__class__
ValueErrorr*   r+   r,   RuntimeErrorr1   r0   r>   )r7   rA   baselengthZ
error_dictr0   r   r   r   __iadd__C   s   "zEndStatistics.__iadd__c                 C   s   dd | j  D }|S )Nc                 S   s   i | ]\}}|t | qS r   )sumvalues)r:   rF   r0   r   r   r   r=   U   s    z)EndStatistics.lengths.<locals>.<dictcomp>)r0   r>   )r7   dr   r   r   lengthsS   s   zEndStatistics.lengths
gc_contentreturnc                 C   sx   | j }| jr|ddd }| jrdnd}d}|g}t|D ]\}}||v r,||d 9 }n|d| d 9 }|| q|S )a  
        Estimate probabilities that this adapter end matches a
        random sequence. Indels are not taken into account.

        Returns a list p, where p[i] is the probability that
        i bases of this adapter match a random sequence with
        GC content gc_content.
        NZCGRYSKMBDHVNZGCg      ?g       @)r+   r6   r.   	enumerateappend)r7   rL   seqZallowed_basespZprobabilitiesicr   r   r   random_match_probabilitiesX   s   	z(EndStatistics.random_match_probabilitiesN)r"   r#   )r   r   r   __doc__r8   r@   r   rG   propertyrK   floatr	   rU   r   r   r   r   r!   ,   s    

r!   c                   @   s@   e Zd ZdZ	dddddded fdd	Zd
d ZdddZdS )AdapterStatisticsz
    Statistics about an adapter. An adapter can work on the 5' end (front)
    or 3' end (back) of a read, and statistics for that are captured
    separately in EndStatistics objects.
    Nr"   Adapterfrontr#   backc                 C   s@   |j | _ || _t|| _|d u rt|| _nt|| _d| _d S Nr   )namer"   r!   r[   r\   reverse_complemented)r7   r"   r[   r\   r   r   r   r8   x   s   


zAdapterStatistics.__init__c                 C      d | j| j| jS )Nz-AdapterStatistics(name={}, front={}, back={}))r?   r^   r[   r\   r7   r   r   r   r@      s
   zAdapterStatistics.__repr__rA   c                 C   s4   |  j |j 7  _ |  j|j7  _|  j|j7  _| S r   )r[   r\   r_   r7   rA   r   r   r   rG      s   zAdapterStatistics.__iadd__r   )rA   rY   )r   r   r   rV   r   r8   r@   rG   r   r   r   r   rY   q   s    

rY   c                   @   sl   e Zd ZU ded< edeeef fddZedeeef fddZede	e	 fdd	Z
ed
d ZdS )MatchrZ   r"   rM   c                 C      d S r   r   ra   r   r   r   remainder_interval      zMatch.remainder_intervalc                 C   rd   r   r   ra   r   r   r   retained_adapter_interval   rf   zMatch.retained_adapter_intervalc                 C   rd   r   r   r7   readr   r   r   get_info_records   rf   zMatch.get_info_recordsc                 C   rd   r   r   rh   r   r   r   trimmed   rf   zMatch.trimmedN)r   r   r   __annotations__r   r   r   re   rg   r	   rj   rk   r   r   r   r   rc      s   
 rc   c                   @   s~   e Zd ZdZg dZdedededededed	d
defddZdd Zde	fddZ
ddedefddZdee fddZdS )SingleMatchzG
    Representation of a single adapter matched to a single string
    )
astartastoprstartrstopmatchesr0   r"   r+   rF   adjacent_basern   ro   rp   rq   rr   r0   r"   r#   r+   c	           	      C   sD   d| _ || _|| _|| _|| _|| _|| _|| _|| _|| | _	d S )Nr)   )
rs   rn   ro   rp   rq   rr   r0   r"   r+   rF   )	r7   rn   ro   rp   rq   rr   r0   r"   r+   r   r   r   r8      s   zSingleMatch.__init__c                 C       d | j| j| j| j| j| jS )NzLSingleMatch(astart={}, astop={}, rstart={}, rstop={}, matches={}, errors={})r?   rn   ro   rp   rq   rr   r0   ra   r   r   r   r@         zSingleMatch.__repr__rM   c                 C   sl   |j | j u o5| j|jko5| j|jko5| j|jko5| j|jko5| j|jko5| j|jko5| j|ju o5| j|jkS r   )	rB   rn   ro   rp   rq   rr   r0   r"   r+   rb   r   r   r   __eq__   s"   







zSingleMatch.__eq__Nwildcard_charc                    s$    fddt  jD }d|S )a4  
        Return a string that contains, for each wildcard character,
        the character that it matches. For example, if the adapter
        ATNGNA matches ATCGTA, then the string 'CT' is returned.

        If there are indels, this is not reliable as the full alignment
        is not available.
        c                    sF   g | ]} j j j|  kr! j| t jk r j j|  qS r   )r"   r+   rn   rp   len)r:   rS   r7   ry   r   r   
<listcomp>   s
    z)SingleMatch.wildcards.<locals>.<listcomp>r)   )rangerF   join)r7   ry   	wildcardsr   r{   r   r      s   	
zSingleMatch.wildcardsc              	   C   s   |j }|j}d| j| j| j|d| j || j| j || jd  | jjg}|rA||d| j || j| j || jd  g7 }|gS |g d7 }|gS )Nr)   r   )r)   r)   r)   )r+   	qualitiesr0   rp   rq   r"   r^   )r7   ri   rQ   r   infor   r   r   rj      s(   
zSingleMatch.get_info_recordsN)rx   )r   r   r   rV   	__slots__r   strr8   r@   boolrw   r   r	   rj   r   r   r   r   rm      s0    	
rm   c                   @   p   e Zd ZdZdd ZdefddZdeeef fddZ	deeef fd	d
Z
dd Zdd ZdefddZdS )RemoveBeforeMatchz.A match that removes sequence before the matchc                 C   rt   )NzRRemoveBeforeMatch(astart={}, astop={}, rstart={}, rstop={}, matches={}, errors={})ru   ra   r   r   r   r@     rv   zRemoveBeforeMatch.__repr__rM   c                 C   s   | j d| j S z
        Return the part of the read before this match if this is a
        'front' (5') adapter,
        return the part after the match if this is not a 'front' adapter (3').
        This can be an empty string.
        N)r+   rp   ra   r   r   r   rest	     zRemoveBeforeMatch.restc                 C      | j t| jfS )
        Return an interval (start, stop) that describes the part of the read that would
        remain after trimming
        )rq   rz   r+   ra   r   r   r   re     s   z$RemoveBeforeMatch.remainder_intervalc                 C   r   r   )rp   rz   r+   ra   r   r   r   rg        z+RemoveBeforeMatch.retained_adapter_intervalc                 C   s   t | jd S r   )slicerq   ra   r   r   r   
trim_slice     zRemoveBeforeMatch.trim_slicec                 C   s   || j d  S r   rq   rh   r   r   r   rk         zRemoveBeforeMatch.trimmed
statisticsc                 C   s    |j j| j | j  d7  < dS !Update AdapterStatistics in placer   N)r[   r0   rq   )r7   r   r   r   r   update_statistics#  s    z#RemoveBeforeMatch.update_statisticsNr   r   r   rV   r@   r   r   r   r   re   rg   r   rk   rY   r   r   r   r   r   r         	r   c                   @   r   )RemoveAfterMatchz-A match that removes sequence after the matchc                 C   rt   )NzQRemoveAfterMatch(astart={}, astop={}, rstart={}, rstop={}, matches={}, errors={})ru   ra   r   r   r   r@   +  rv   zRemoveAfterMatch.__repr__rM   c                 C   s   | j | jd S r   )r+   rq   ra   r   r   r   r   /  r   zRemoveAfterMatch.restc                 C   
   d| j fS )r   r   rp   ra   r   r   r   re   8  s   
z#RemoveAfterMatch.remainder_intervalc                 C   r   r]   r   ra   r   r   r   rg   ?     
z*RemoveAfterMatch.retained_adapter_intervalc                 C   s   t d | jS r   )r   rp   ra   r   r   r   r   B  r   zRemoveAfterMatch.trim_slicec                 C   s   |d | j  S r   r   rh   r   r   r   rk   F  r   zRemoveAfterMatch.trimmedr   c                 C   sx   | j | jd | j }|jjt| j | j  | j  d7  < z|jj|  d7  < W dS  ty;   d|jjd< Y dS w )r   r   r)   N)r+   rp   r\   r0   rz   r1   KeyError)r7   r   rs   r   r   r   r   I  s   &z"RemoveAfterMatch.update_statisticsNr   r   r   r   r   r   (  r   r   rM   c                 C   s    t | d }| d  d7  < |S )Nr   r   )r   )_startr^   r   r   r   _generate_adapter_nameS  s   r   c                   @   s<   e Zd ZdZdefddZedd Zedefdd	Zd
S )	Matchablez'Something that has a match_to() method.r^   c                 O   s
   || _ d S r   r^   )r7   r^   argskwargsr   r   r   r8   \  r   zMatchable.__init__c                 C   rd   r   r   ra   r   r   r   enable_debug_  rf   zMatchable.enable_debugr+   c                 C   rd   r   r   r7   r+   r   r   r   match_toc  rf   zMatchable.match_toN)	r   r   r   rV   r   r8   r   r   r   r   r   r   r   r   Y  s    
r   c                   @   s"   e Zd ZdZedefddZdS )rZ   zadapter with one componentrM   c                 C   rd   r   r   ra   r   r   r   create_statisticsl  rf   zAdapter.create_statisticsN)r   r   r   descriptionr   rY   r   r   r   r   r   rZ   h  s    rZ   c                       s   e Zd ZU dZdZeed< 						d#ded	ed
e	dedede
e def fddZde	dejfddZdd Zede	fddZd$ddZedd ZedefddZde	fdd Zdefd!d"Z  ZS )%r#   aw  
    This class can find a single adapter characterized by sequence, error rate,
    type etc. within reads.

    where --  A Where enum value. This influences where the adapter is allowed to appear within the
        read.

    sequence -- The adapter sequence as string. Will be converted to uppercase.
        Also, Us will be converted to Ts.

    max_errors -- Maximum allowed errors (non-negative float). If the values is less than 1, this is
        interpreted as a rate directly and passed to the aligner. If it is 1 or greater, the value
        is converted to a rate by dividing it by the length of the sequence.

        The error rate is the number of errors in the alignment divided by the length
        of the part of the alignment that matches the adapter.

    minimum_overlap -- Minimum length of the part of the alignment
        that matches the adapter.

    read_wildcards -- Whether IUPAC wildcards in the read are allowed.

    adapter_wildcards -- Whether IUPAC wildcards in the adapter are
        allowed.

    name -- optional name of the adapter. If not provided, the name is set to a
        unique number.
    Tr/   皙?   FNr+   
max_errorsmin_overlapread_wildcardsr-   r^   indelsc           
         s   |d u rt  n|| _t | j d| _| dd| _| js$td|dkr/|t	| j }|| _
t|t	| j| _td}|r[t| j|ks[| jD ]}	|	|vrZtd|	| jqK|oft| jtdk | _|| _|| _|  | _d S )	NFUr(   zAdapter sequence is emptyr   ZABCDGHKMNRSTUVWXYziCharacter {!r} in adapter sequence {!r} is not a valid IUPAC code. Use only characters ABCDGHKMNRSTUVWXY.ZACGT)r   r^   superr8   _debugupperreplacer+   rC   rz   r*   minr   	frozensetsetr   r?   r-   r   r   _aligneraligner)
r7   r+   r   r   r   r-   r^   r   ZiupacrT   rB   r   r   r8     s,   


zSingleAdapter.__init__flagsrM   c              	   C   s0   | j rdnd}tj| j| j|| j| j|| jdS )Nr   i )r   wildcard_refwildcard_query
indel_costr   )r   r   Alignerr+   r*   r-   r   r   )r7   r   r   r   r   r   _make_aligner  s   zSingleAdapter._make_alignerc                 C   s   dj dd| jjit| S )Nz<{cls}(name={name!r}, sequence={sequence!r}, max_error_rate={max_error_rate}, min_overlap={min_overlap}, read_wildcards={read_wildcards}, adapter_wildcards={adapter_wildcards}, indels={indels})>clsr   )r?   rB   r   varsra   r   r   r   r@     s   zSingleAdapter.__repr__c                 C      | j jS r   )r   r,   ra   r   r   r   r,        zSingleAdapter.effective_lengthc                 C   s   d| _ | j  dS )zg
        Print out the dynamic programming matrix after matching a read to an
        adapter.
        TN)r   r   r   ra   r   r   r   r     s   zSingleAdapter.enable_debugc                 C   rd   r   r   ra   r   r   r   r     rf   zSingleAdapter._alignerc                 C      dS )
        Attempt to match this adapter to the given string.

        Return a Match instance if a match was found;
        return None if no match was found given the matching criteria (minimum
        overlap length, maximum error rate).
        Nr   r   r   r   r   r         zSingleAdapter.match_toc                 C   
   t | jS r   )rz   r+   ra   r   r   r   __len__  r   zSingleAdapter.__len__c                 C   s
   t | | S r   )rY   ra   r   r   r   r     r   zSingleAdapter.create_statistics)r   r   FTNT)rM   N)r   r   r   rV   r/   r   rl   r   rX   r   r   r8   r   r   r   r@   rW   r,   r   r   r   r   r   rY   r   __classcell__r   r   r   r   r#   q  sF   
 !

	r#   c                       sB   e Zd ZdZdZ fddZdejfddZde	fd	d
Z
  ZS )r3   zA 5' adapterz
regular 5'c                    $   | dd| _t j|i | d S NZforce_anywhereFpop_force_anywherer   r8   r7   r   r   r   r   r   r8        zFrontAdapter.__init__rM   c                 C      |  | jr
tjjS tjjS r   )r   r   r   r   valuer   ra   r   r   r   r        zFrontAdapter._alignerr+   c                 C   :   | j |}| jrt| j j |du rdS t|| |dS z
        Attempt to match this adapter to the given read.

        Return a Match instance if a match was found;
        return None if no match was found given the matching criteria (minimum
        overlap length, maximum error rate).
        Nr"   r+   )r   locater   printdpmatrixr   r7   r+   	alignmentr   r   r   r        zFrontAdapter.match_to)r   r   r   rV   r   r8   r   r   r   r   r   r   r   r   r   r   r3     s    r3   c                       s:   e Zd ZdZdZ fddZdd Zdefdd	Z  Z	S )
BackAdapterzA 3' adapterz
regular 3'c                    r   r   r   r   r   r   r   r8   
  r   zBackAdapter.__init__c                 C   r   r   )r   r   r   r   r   r   ra   r   r   r   r     r   zBackAdapter._alignerr+   c                 C   r   r   )r   r   r   r   r   r   r   r   r   r   r     r   zBackAdapter.match_to)
r   r   r   rV   r   r8   r   r   r   r   r   r   r   r   r     s    r   c                   @   *   e Zd ZdZdZdd ZdefddZdS )	AnywhereAdapterz
    An adapter that can be 5' or 3'. If a match involves the first base of
    the read, it is assumed to be a 5' adapter and a 3' otherwise.
    zvariable 5'/3'c                 C      |  tjjS r   )r   r   r   r   ra   r   r   r   r   )  r   zAnywhereAdapter._alignerr+   c                 C   sb   | j | }| jrt| j j |du rdS |d dkr't|| |d}|S t|| |d}|S )r   N   r   r   )r   r   r   r   r   r   r   r   )r7   r+   r   matchr   r   r   r   ,  s   zAnywhereAdapter.match_toNr   r   r   rV   r   r   r   r   r   r   r   r   r   !  s
    r   c                   @   r   )	r4   zA non-internal 5' adapterznon-internal 5'c                 C   r   r   )r   r   r   r   ra   r   r   r   r   F  r   z NonInternalFrontAdapter._alignerr+   c                 C   R   | j |}| jrzt| j j W n	 ty   Y nw |d u r!d S t|| |dS Nr   )r   r   r   r   r   AttributeErrorr   r   r   r   r   r   I     z NonInternalFrontAdapter.match_toNr   r   r   r   r   r4   A  
    r4   c                   @   r   )	NonInternalBackAdapterzA non-internal 3' adapterznon-internal 3'c                 C   r   r   )r   r   r   r   ra   r   r   r   r   [  r   zNonInternalBackAdapter._alignerr+   c                 C   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   ^  r   zNonInternalBackAdapter.match_toNr   r   r   r   r   r   V  r   r   c                   @       e Zd ZdZdZdZdd ZdS )r5   zAn anchored 5' adapterzanchored 5'Fc                 C   2   | j stj| j| j| j| j| jdS | t	j
jS N)r   r   r   )r   r   ZPrefixComparerr+   r*   r-   r   r   r   r   r   r   ra   r   r   r   r   q     zPrefixAdapter._alignerNr   r   r   rV   r   r/   r   r   r   r   r   r5   k  
    r5   c                   @   r   )SuffixAdapterzAn anchored 3' adapterzanchored 3'Fc                 C   r   r   )r   r   ZSuffixComparerr+   r*   r-   r   r   r   r   r   r   ra   r   r   r   r     r   zSuffixAdapter._alignerNr   r   r   r   r   r   ~  r   r   c                   @   s   e Zd ZdZdededdfddZdd	 Zed
d Z	edd Z
dd Zedd Zdd Zdeeef fddZdeeef fddZdee fddZdS )LinkedMatchz.
    Represent a match of a LinkedAdapter
    front_match
back_matchr"   LinkedAdapterc                 C   s*   |d us
|d us
J || _ || _|| _d S r   )r   r   r"   )r7   r   r   r"   r   r   r   r8     s   
zLinkedMatch.__init__c                 C   r`   )Nz:<LinkedMatch(front_match={!r}, back_match={}, adapter={})>)r?   r   r   r"   ra   r   r   r   r@     s   zLinkedMatch.__repr__c                 C   s4   d}| j dur|| j j7 }| jdur|| jj7 }|S )zNumber of matching basesr   N)r   rr   r   )r7   mr   r   r   rr     s   

zLinkedMatch.matchesc                 C   s4   d}| j d ur|| j j7 }| jd ur|| jj7 }|S r]   )r   r0   r   )r7   er   r   r   r0     s   

zLinkedMatch.errorsc                 C   s(   | j r	| j |}| jr| j|}|S r   )r   rk   r   rh   r   r   r   rk     s
   zLinkedMatch.trimmedc                 C   r   r   )r   rs   ra   r   r   r   rs     r   zLinkedMatch.adjacent_basec                 C   sd   | j r|jj| j j | j j  d7  < | jr0t| jj| jj }|jj| | jj  d7  < dS dS r   )	r   r[   r0   rq   r   rz   r+   rp   r\   )r7   r   rF   r   r   r   r     s     zLinkedMatch.update_statisticsrM   c                 C   s   dd | j | jfD }t|S )Nc                 S   s   g | ]}|d ur|qS r   r   )r:   r   r   r   r   r|     s    z2LinkedMatch.remainder_interval.<locals>.<listcomp>)r   r   	remainder)r7   rr   r   r   r   re     s   zLinkedMatch.remainder_intervalc                 C   sN   | j r| j j}| j j}nd }}| jr| jj| }||fS t| j j}||fS r]   )r   rp   rq   r   rz   r+   )r7   startoffsetendr   r   r   rg     s   
z%LinkedMatch.retained_adapter_intervalc                 C   sr   g }| j df| jdffD ]*\}}|d u rq||d }| jjd u r$dn| jj| |d< || ||}q|S )Nz;1z;2r   none   )r   r   rj   r"   r^   rP   rk   )r7   ri   recordsr   Z
namesuffixrecordr   r   r   rj     s    
zLinkedMatch.get_info_recordsN)r   r   r   rV   r   r   r8   r@   rW   rr   r0   rk   rs   r   r   r   re   rg   r	   rj   r   r   r   r   r     s    
	

r   c                
       s~   e Zd ZdZdZdededededef
 fdd	Zd
d Z	dede
e fddZdefddZedd Zedd Z  ZS )r   z'A 5' adapter combined with a 3' adapterr   front_adapterback_adapterfront_requiredback_requiredr^   c                    sN   t  | || _|| _tj| _|d u rt n|| _|| _	| j| j	_|| _
d S r   )r   r8   r   r  r   r   wherer   r^   r   r   )r7   r   r   r   r  r^   r   r   r   r8     s   

zLinkedAdapter.__init__c                 C   s   | j   | j  d S r   )r   r   r   ra   r   r   r   r     s   
zLinkedAdapter.enable_debugr+   rM   c                 C   sd   | j |}| jr|du rdS |dur||  }| j|}|du r,| js*|du r,dS t||| S )z@
        Match the two linked adapters against a string
        N)r   r   r   r   r   r  r   )r7   r+   r   r   r   r   r   r      s   zLinkedAdapter.match_toc                 C   s   t | | j| jS r   )rY   r   r   ra   r   r   r   r     r   zLinkedAdapter.create_statisticsc                 C   s   | j jd | jj S )Nz...)r   r+   r   ra   r   r   r   r+     s   zLinkedAdapter.sequencec                 C   rd   r   r   ra   r   r   r   remove  rf   zLinkedAdapter.remove)r   r   r   rV   r   r#   r   r   r8   r   r   r   r   rY   r   rW   r+   r  r   r   r   r   r   r     s*    
r   c                       sX   e Zd ZdZdee f fddZdd Zdd Zd	d
 Z	de
dee fddZ  ZS )MultipleAdaptersz-
    Represent multiple adapters at once
    adaptersc                    s   t  jdd || _d S )NZmultiple_adaptersr   )r   r8   	_adapters)r7   r  r   r   r   r8     s   
zMultipleAdapters.__init__c                 C   s   | j D ]}|  qd S r   )r  r   )r7   ar   r   r   r   "  s   

zMultipleAdapters.enable_debugc                 C   s
   | j | S r   )r  )r7   itemr   r   r   __getitem__&  r   zMultipleAdapters.__getitem__c                 C   r   r   )rz   r  ra   r   r   r   r   )  r   zMultipleAdapters.__len__r+   rM   c                 C   sX   d}| j D ]$}||}|du rq|du s'|j|jks'|j|jkr)|j|jk r)|}q|S )z
        Find the adapter that best matches the sequence.

        Return either a Match instance or None if there are no matches.
        N)r  r   rr   r0   )r7   r+   
best_matchr"   r   r   r   r   r   ,  s   

zMultipleAdapters.match_to)r   r   r   rV   r   r   r8   r   r	  r   r   r   rm   r   r   r   r   r   r   r    s    r  c                       s   e Zd ZdZeeeeeef f Z	 fddZ
dd ZdefddZed	d
 ZedefddZedd Zedd Zdeee df fddZdefddZdefddZdd Z  ZS )IndexedAdaptersa  
    Represent multiple adapters of the same type at once and use an index data structure
    to speed up matching. This acts like a "normal" Adapter as it provides a match_to
    method, but is faster with lots of adapters.

    There are quite a few restrictions:
    - the error rate allows at most 2 mismatches
    - wildcards in the adapter are not allowed
    - wildcards in the read are not allowed

    Use the is_acceptable() method to check individual adapters.
    c                    s   t  jdd |std|D ]}| | q|| _t|| _|  \| _| _	t
dt| jdd t| jdkrD| jd | _| j| _n| j| _|  | _d	S )
z+All given adapters must be of the same typeZindexed_adaptersr   zAdapter list is emptyzString lengths in the index: %sTreverser   r   N)r   r8   rC   _acceptr  r  _multiple_adapters_make_index_lengths_indexloggerdebugsortedrz   _length_match_to_one_lengthr   _match_to_multiple_lengths_get_make_affix_make_affix)r7   r  r"   r   r   r   r8   O  s   

zIndexedAdapters.__init__c                 C   s   d | jj| jS )Nz{}(adapters={!r}))r?   rB   r   r  ra   r   r   r   r@   a  s   zIndexedAdapters.__repr__r+   c                 C   r   )z4Never called because it gets overwritten in __init__Nr   r   r   r   r   r   d  r   zIndexedAdapters.match_toc                 C   rd   r   r   ra   r   r   r   r  g  rf   zIndexedAdapters._get_make_affixrM   c                 C   rd   r   r   r7   r"   rF   rr   r0   r+   r   r   r   _make_matchk  rf   zIndexedAdapters._make_matchc                 C   sB   |j rtd|jrtdtt||j }|dkrtddS )z3Raise a ValueError if the adapter is not acceptablez#Wildcards in the read not supportedz&Wildcards in the adapter not supportedr   zError rate too highN)r   rC   r-   r   rz   r*   )r   r"   r;   r   r   r   r  o  s   zIndexedAdapters._acceptc                 C   s&   z|  | W dS  ty   Y dS w )z
        Return whether this adapter is acceptable for being used in an index

        Adapters are not acceptable if they allow wildcards, allow too many errors,
        or would lead to a very large index.
        FT)r  rC   r   r"   r   r   r   is_acceptablez  s   zIndexedAdapters.is_acceptableAdapterIndexc                 C   s   t dt| j t }t }d}| jD ]Y}|j}t|jt| }|j	r(t
jnt
j}|||D ]<\}}	}
||v r^|| \}}}|
|k rEq0||
kr]|s]t d|j|j|j|j|||
 d}n||	|
f||< |t| q0qt dt| t|dd|fS )Nz!Building index of %s adapters ...FzAdapters %s %r and %s %r are very similar. At %s allowed errors, the sequence %r cannot be assigned uniquely because the number of matches is %s compared to both adapters.Tz%Built an index containing %s strings.r  )r  r   rz   r  r9   r   r+   r   r*   r   r   Zedit_environmentZhamming_environmentwarningr^   addr  )r7   indexrK   Z
has_warnedr"   r+   r;   environmentsr0   rr   Zother_adapterZother_errorsZother_matchesr   r   r   r    s4   

zIndexedAdapters._make_indexc                 C   sd   |  | | j}d|v r| j|S z
| j| \}}}W n
 ty'   Y dS w | || j|||S )
        Match the adapters against a string and return a Match that represents
        the best match or None if no match was found
        rx   N)r  r   r  r  r   r  r   r  )r7   r+   affixr"   r   r   r   r   r   r    s   z$IndexedAdapters._match_to_one_lengthc              	   C   s   |  }d}d}d}d}d}| jD ]F}||k r n?| ||}|r/d|v r-| j|  S d}z
| j| \}	}
}W n	 tyB   Y qw ||ksO||krW|
|k rW|	}|
}|}|}q|dkr^dS | |||||S )r%  Nr   rN   i  Trx   F)r   r  r  r  r   r  r   r  )r7   r+   r&  Zbest_adapterZbest_lengthZbest_mZbest_eZcheck_nrF   r"   r   r   r   r   r   r    s8   
z*IndexedAdapters._match_to_multiple_lengthsc                 C   rd   r   r   ra   r   r   r   r     s   zIndexedAdapters.enable_debug)r   r   r   rV   r   r   r   r#   r   r  r8   r@   r   r   r  rm   r  classmethodr  r  r	   r  r  r  r   r   r   r   r   r   r  @  s$    



'r  c                       <   e Zd Ze fddZdd Zdd Zedd Z  Z	S )	IndexedPrefixAdaptersc                       t |ts	tdt |S )Nz%Only 5' anchored adapters are allowed)r2   r5   rC   r   r  r  r   r   r   r       
zIndexedPrefixAdapters._acceptc              
   C   s   t dt|jd|||||dS Nr   )rn   ro   rp   rq   rr   r0   r"   r+   )r   rz   r+   r  r   r   r   r    s   z!IndexedPrefixAdapters._make_matchc                 C      | j S r   )_make_prefixra   r   r   r   r       z%IndexedPrefixAdapters._get_make_affixc                 C   s   | d | S r   r   r$  nr   r   r   r.    r   z"IndexedPrefixAdapters._make_prefix)
r   r   r   r'  r  r  r  staticmethodr.  r   r   r   r   r   r)        r)  c                       r(  )	IndexedSuffixAdaptersc                    r*  )Nz%Only anchored 3' adapters are allowed)r2   r   rC   r   r  r  r   r   r   r    r+  zIndexedSuffixAdapters._acceptc              
   C   s*   t dt|jt|| t|||||dS r,  )r   rz   r+   r  r   r   r   r    s   
z!IndexedSuffixAdapters._make_matchc                 C   r-  r   )_make_suffixra   r   r   r   r    r/  z%IndexedSuffixAdapters._get_make_affixc                 C   s   | | d  S r   r   r0  r   r   r   r5    s   z"IndexedSuffixAdapters._make_suffix)
r   r   r   r'  r  r  r  r2  r5  r   r   r   r   r   r4    r3  r4  c                 C   sD   t  }| D ]}|j|jf}||v rtd|j|j |j||< qd S )NzZAdapter %r (%s) was specified multiple times! Please make sure that this is what you want.)r9   rB   r+   r  r   r   r^   )r  rJ   r"   keyr   r   r   warn_duplicate_adapters  s   r7  rr   c                 C   sB   | st dd}| D ]}| \}}||7 }q
|| }||| fS )z
    Determine which section of the read would not be trimmed. Return a tuple (start, stop)
    that gives the interval of the untrimmed part relative to the original read.

    matches must be non-empty
    zmatches must not be emptyr   )rC   re   )rr   r   r   Zmatch_startZ
match_stoprF   r   r   r   r   #  s   
r   )3rV   loggingenumr   collectionsr   typingr   r   r   r   r   r	   abcr
   r   r)   r   	getLoggerr  	Exceptionr   r   r    r!   rY   rc   rm   r   r   r   r   r   rZ   r#   r3   r   r   r4   r   r5   r   r   r   r  r  r)  r4  r7  r   r   r   r   r   r   <module>   sJ     E$X&+	x S6& !"