o
    DfQ                     @   sp  d Z ddlZddlmZ ddlZddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ dd	lmZm Z m!Z!m"Z"m#Z#m$Z$ d
e	e% de%de&fddZ'de	e% de	e% de	e% fddZ(G dd dZ)dede*fddZ+dede%de&de*fddZ,G dd dZ-de)d e&de&de*fd!d"Z.de)d e&de&de*fd#d$Z/dS )%z!
Routines for printing a report.
    N)StringIO)Counter)AnyOptionalListDict   )
EndStatisticsAdapterStatisticsFrontAdapterNonInternalFrontAdapterPrefixAdapterBackAdapterNonInternalBackAdapterSuffixAdapterAnywhereAdapterLinkedAdapter)QualityTrimmerNextseqQualityTrimmerAdapterCutterPairedAdapterCutterReverseComplementerPairedEndModifierWrapper)WithStatisticsTooShortReadFilterTooLongReadFilterNContentFilterCasavaFilterMaximumExpectedErrorsFilter	numeratordenominatorreturnc                 C   s   | d u s|sdS | | S )Ng         )r   r    r"   r"   X/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cutadapt/report.pysafe_divide   s   r$   abc                 C   s    | d u r|S |d u r| S | | S Nr"   )r%   r&   r"   r"   r#   add_if_not_none   s
   r(   c                   @   s@  e Zd Zd+ddZdefddZded	ed
ee fddZd+ddZ	d+ddZ
edefddZedefddZedefddZedefddZedee fddZedefddZedefddZedefdd Zedefd!d"Zedefd#d$Zedefd%d&Zedefd'd(Zedefd)d*ZdS ),
Statisticsr!   Nc                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	ddg| _
ddg| _t t g| _ddg| _ddg| _g g g| _dS )	
        Nr   )paireddid_quality_trimming	too_shorttoo_long
too_many_ntoo_many_expected_errorscasava_filteredreverse_complementednwrittentotal_bp
written_bpr   written_lengthswith_adaptersquality_trimmed_bpadapter_statsselfr"   r"   r#   __init__#   s    



zStatistics.__init__otherc                 C   s  |  j |j 7  _ |  j|j7  _| jd u r|j| _n
| j|jkr$td| jd u r.|j| _n
| j|jkr8tdt| j|j| _t| j|j| _t| j|j| _t| j	|j	| _	t| j
|j
| _
t| j|j| _dD ]}| j|  |j| 7  < | j|  |j| 7  < | j|  |j| 7  < | j|  |j| 7  < | j|  |j| 7  < | j| r|j| rt| j| t|j| krtdtt| j| D ]}| j| |  |j| | 7  < qqj|j| r| j| g ksJ |j| | j|< qj| S )Nz,Incompatible Statistics: paired is not equalz:Incompatible Statistics: did_quality_trimming is not equalr   r   z6Incompatible Statistics objects (adapter_stats length))r3   r4   r+   
ValueErrorr,   r(   r2   r-   r.   r/   r0   r1   r5   r6   r7   r8   r9   r:   lenrange)r<   r>   ijr"   r"   r#   __iadd__7   sJ   



"
zStatistics.__iadd__r3   	total_bp1	total_bp2c                 C   sj   || _ || jd< |du rd| _nd| _|| jd< |D ]}| | q| jdus)J |D ]}| | q+| S )z
        n -- total number of reads
        total_bp1 -- number of bases in first reads
        total_bp2 -- number of bases in second reads. None for single-end data.
        r   NFTr   )r3   r5   r+   _collect_writerr4   _collect_modifier)r<   r3   rF   rG   	modifiersZwriterswritermodifierr"   r"   r#   collect\   s   

zStatistics.collectc                 C   s   t |tr1|  j| 7  _| }| }dD ]}| j|  || 7  < | j|  || 7  < qt|drrt |jtrB|j	| _
d S t |jtrN|j	| _d S t |jtrZ|j	| _d S t |jtrf|j	| _d S t |jtrt|j	| _d S d S d S )Nr?   filter)
isinstancer   r4   Zwritten_readsr6   r7   hasattrrN   r   filteredr-   r   r.   r   r/   r   r0   r   r1   )r<   wr6   r7   rC   r"   r"   r#   rH   s   s(   

	zStatistics._collect_writerc                 C   s  t |tr"dD ]}| j|  |j7  < t|j|  | j|< qd S t |tr2d|jfd|j	fg}nd|fg}|D ]N\}}t |t
tfrN|j| j|< d| _q9t |trh| j|  |j7  < t|j | j|< q9t |tr| j|  |jj7  < t|jj | j|< |j| _q9d S )Nr?   r   r   T)rO   r   r8   listadapter_statisticsvaluesr:   r   Z
_modifier1Z
_modifier2r   r   Ztrimmed_basesr9   r,   r   r   Zadapter_cutterr2   )r<   mrC   Zmodifiers_listrL   r"   r"   r#   rI      s*   




zStatistics._collect_modifierc                 C   
   t | jS r'   )sumr5   r;   r"   r"   r#   total      
zStatistics.totalc                 C   rW   r'   )rX   r9   r;   r"   r"   r#   quality_trimmed   rZ   zStatistics.quality_trimmedc                 C   rW   r'   )rX   r6   r;   r"   r"   r#   total_written_bp   rZ   zStatistics.total_written_bpc                 C      t | j| jS r'   )r$   r4   r3   r;   r"   r"   r#   written_fraction      zStatistics.written_fractionc                    s    fdd j D S )Nc                    s   g | ]}t | jqS r"   )r$   r3   ).0vr;   r"   r#   
<listcomp>   s    z5Statistics.with_adapters_fraction.<locals>.<listcomp>)r8   r;   r"   r;   r#   with_adapters_fraction   s   z!Statistics.with_adapters_fractionc                 C   r]   r'   )r$   r[   rY   r;   r"   r"   r#   quality_trimmed_fraction   r_   z#Statistics.quality_trimmed_fractionc                 C   r]   r'   )r$   r\   rY   r;   r"   r"   r#   total_written_bp_fraction   r_   z$Statistics.total_written_bp_fractionc                 C   r]   r'   )r$   r2   r3   r;   r"   r"   r#   reverse_complemented_fraction   r_   z(Statistics.reverse_complemented_fractionc                 C   r]   r'   )r$   r-   r3   r;   r"   r"   r#   too_short_fraction   r_   zStatistics.too_short_fractionc                 C   r]   r'   )r$   r.   r3   r;   r"   r"   r#   too_long_fraction   r_   zStatistics.too_long_fractionc                 C   r]   r'   )r$   r/   r3   r;   r"   r"   r#   too_many_n_fraction   r_   zStatistics.too_many_n_fractionc                 C   r]   r'   )r$   r0   r3   r;   r"   r"   r#   !too_many_expected_errors_fraction   r_   z,Statistics.too_many_expected_errors_fractionc                 C   r]   r'   )r$   r1   r3   r;   r"   r"   r#   casava_filtered_fraction   r_   z#Statistics.casava_filtered_fraction)r!   N)__name__
__module____qualname__r=   r   rE   intr   rM   rH   rI   propertyrY   r[   r\   floatr^   r   rc   rd   re   rf   rg   rh   ri   rj   rk   r"   r"   r"   r#   r)   "   s@    
%

r)   rT   c                 C   s   | j }| j}| jrOd}d}tdt|| d D ]}t|| }|d||d |d 7 }|}q||krA|d|t|| 7 }n|d||t|| 7 }n	dt||  }d| d S )Nr   
z{}-{} bp: {}; z	{} bp: {}z{}-{} bp: {} zNo. of allowed errors:)Zeffective_lengthmax_error_rateZallows_partial_matchesrB   ro   format)rT   lengthZ
error_rateprevserrorsrr"   r"   r#   error_ranges   s   r{   end_statisticsr3   
gc_contentc           
         s   t  }| j}| j | j|d}tdddddd|d t|D ]D||tt| j  }| }t	  
 }d	 fd
dt|d D }	t|d|t| jtt| j |	d|d q| d S )a  
    Return a formatted histogram. Include the no. of reads expected to be
    trimmed by chance (assuming a uniform distribution of nucleotides in the reads).

    adapter_statistics -- EndStatistics object
    adapter_length -- adapter length
    n -- total no. of reads.
    )r}   rv   countexpectzmax.errzerror counts	)sepfilers   c                 3   s     | ]}t   | V  qd S r'   str)r`   ery   rv   r"   r#   	<genexpr>   s    zhistogram.<locals>.<genexpr>r   z{:.1F}rr   )r   lengthsry   Zrandom_match_probabilitiesprintsortedminrA   sequencemaxkeysjoinrB   ru   ro   rt   getvalue)
r|   r3   r}   siodZmatch_probabilitiesr   r~   Z
max_errorsZerrsr"   r   r#   	histogram   s(   	"	r   c                   @   sJ   e Zd Zdeeef fddZdd Zede	fddZ
defd	d
ZdS )AdjacentBaseStatisticsbasesc                 C   s   || _ d| _t| j  }|dkrd| _dS g | _dD ]-}|dkr#|nd}d| j |  | }| j|d| j |  | f |dkrH|dkrH|| _q|dk rRd| _dS dS )	r*   Nr   )ACGT r   z
none/otherg      ?g?   )r   	_warnbaserX   rU   
_fractionsappend)r<   r   rY   basetextfractionr"   r"   r#   r=   	  s    

zAdjacentBaseStatistics.__init__c                 C   s   d | jS )Nz AdjacentBaseStatistics(bases={}))ru   r   r;   r"   r"   r#   __repr__  s   zAdjacentBaseStatistics.__repr__r!   c                 C   s
   | j d uS r'   )r   r;   r"   r"   r#   should_warn  rZ   z"AdjacentBaseStatistics.should_warnc                 C   sz   | j sdS t }td|d | j D ]\}}td|||d q| jr9td|d td| j|d td|d | S )Nr   z!Bases preceding removed adapters:)r   z  {}: {:.1%}WARNING:z4    The adapter is preceded by "{}" extremely often.zD    The provided adapter sequence could be incomplete at its 5' end.)r   r   r   ru   r   r   r   )r<   r   r   r   r"   r"   r#   __str__#  s   zAdjacentBaseStatistics.__str__N)rl   rm   rn   r   r   ro   r=   r   rp   boolr   r   r"   r"   r"   r#   r     s    r   statstimec                    s  | j dkrdS |dkrd}t   fdd}tjdd dkr"d	}nd
}|d|d| | j  || j | d d  d}| jrG|td7 }n|td7 }| jdurW|d7 }| j	dur`|d7 }| j
duri|d7 }| jdurr|d7 }| jdur{|d7 }| jdur|d7 }|td7 }| jr|d7 }|d7 }| jr|d7 }| jr|d7 }|d7 }|d7 }| jr|d7 }|d7 }| jrd nd!}|j| |d"}|| d#}d$D ]t}| j| D ]k}	t|	jj }
t|	jj }|
| }|	j}|	j}t|tttfr|
dksJ t|tttfr|dksJ | jr |dkrd%nd&}nd'}|d(|d) |	jd( |  t|trN|d*|	jj|	jjt |	jjt |	jj|
| n|d+|	jj|j!t |	jj|d'd, | jduro|d-| n|  |dkr{|  qt|t"r||
d. ||d/ |  |t#|	j |d0 |t$|	j| j | |  |d1 |t$|	j| j | qt|tr|  |t#|	j |t#|	j |d2 |t$|	j| j | |  |d3 |t$|	j| j | qt|tttfr|  |t#|	j |d4 |t$|	j| j | qt|tttfsJ |  |t#|	j t%|	jj&}|p.|j'}|| |d4 |t$|	j| j | qq|rR|d5 |d6 |d7  ( ) S )8z Print report to standard output.r   zNo reads processed!gư>c                     s    |d< t | i | d S )Nr   )r   )argskwargsr   r"   r#   print_s9  s   zfull_report.<locals>.print_sN   )      u   µz>Finished in {:.2F} s ({:.0F} {}s/read; {:.2F} M reads/minute).g    .A<   z
=== Summary ===

a
          Total read pairs processed:      {o.n:13,d}
          Read 1 with adapter:           {o.with_adapters[0]:13,d} ({o.with_adapters_fraction[0]:.1%})
          Read 2 with adapter:           {o.with_adapters[1]:13,d} ({o.with_adapters_fraction[1]:.1%})
        z        Total reads processed:           {o.n:13,d}
        Reads with adapters:             {o.with_adapters[0]:13,d} ({o.with_adapters_fraction[0]:.1%})
        zgReverse-complemented:            {o.reverse_complemented:13,d} ({o.reverse_complemented_fraction:.1%})
z\{pairs_or_reads} that were too short:       {o.too_short:13,d} ({o.too_short_fraction:.1%})
zZ{pairs_or_reads} that were too long:        {o.too_long:13,d} ({o.too_long_fraction:.1%})
z^{pairs_or_reads} with too many N:           {o.too_many_n:13,d} ({o.too_many_n_fraction:.1%})
zz{pairs_or_reads} with too many exp. errors: {o.too_many_expected_errors:13,d} ({o.too_many_expected_errors_fraction:.1%})
zh{pairs_or_reads} failed CASAVA filter:      {o.casava_filtered:13,d} ({o.casava_filtered_fraction:.1%})
z    {pairs_or_reads} written (passing filters): {o.written:13,d} ({o.written_fraction:.1%})

    Total basepairs processed: {o.total:13,d} bp
    z"  Read 1: {o.total_bp[0]:13,d} bp
z"  Read 2: {o.total_bp[1]:13,d} bp
zZQuality-trimmed:           {o.quality_trimmed:13,d} bp ({o.quality_trimmed_fraction:.1%})
z,  Read 1: {o.quality_trimmed_bp[0]:13,d} bp
z,  Read 2: {o.quality_trimmed_bp[1]:13,d} bp
z\Total written (filtered):  {o.total_written_bp:13,d} bp ({o.total_written_bp_fraction:.1%})
z$  Read 1: {o.written_bp[0]:13,d} bp
z$  Read 2: {o.written_bp[1]:13,d} bp
ZPairsZReads)opairs_or_readsFr?   zFirst read: zSecond read: r   z===ZAdapterzZSequence: {}...{}; Type: linked; Length: {}+{}; 5' trimmed: {} times; 3' trimmed: {} timesz5Sequence: {}; Type: {}; Length: {}; Trimmed: {} times)endz ; Reverse-complemented: {} timesz)times, it overlapped the 5' end of a readz6times, it overlapped the 3' end or was within the readz"Overview of removed sequences (5')z,Overview of removed sequences (3' or within)z'Overview of removed sequences at 5' endz'Overview of removed sequences at 3' endzOverview of removed sequencesr   z<    One or more of your adapter sequences may be incomplete.z)    Please see the detailed output above.)*r3   r   sysversion_inforu   r+   textwrapdedentr2   r-   r.   r/   r0   r1   r,   r:   rX   Zfrontr   rU   backadapterrO   r   r   r   r   r   r   namer   r   rA   descriptionr   r{   r   r   adjacent_basesr   r   rstrip)r   r   r}   r   microreportr   warningwhich_in_pairrT   Ztotal_frontZ
total_backrY   r2   r   extraZ
base_statsr"   r   r#   full_report1  s   
 











Hr   c           
      C   s  |}|}dd }d| j | j|| j|| j|| j| j| jd | jd | jd g
}| j	r<|| jd | jd | jd g7 }d}dD ]}| j
| D ]}t|jtttfr]t|jjjr]d} nqGq@|red	|d< g d
}	| j	rr|	g d7 }	d|	d ddd |D  S )z:Create a minimal tabular report suitable for concatenationc                 S   s   | d u rdS | S )Nr   r"   )valuer"   r"   r#   none  s   zminimal_report.<locals>.noneOKr   r   Fr?   TWARN)
statusZin_readsZin_bpr-   r.   r/   Z	out_readsz
w/adaptersZqualtrim_bpZout_bp)zw/adapters2Zqualtrim2_bpZout2_bpr   rr   c                 s   s    | ]}t |V  qd S r'   r   )r`   xr"   r"   r#   r     s    z!minimal_report.<locals>.<genexpr>)r3   rY   r-   r.   r/   r4   r8   r9   r6   r+   r:   rO   r   r   r   r   r   r   r   r   r   )
r   r   r}   _r   fieldsr   r   rT   headerr"   r"   r#   minimal_report  sD   "r   )0__doc__r   ior   r   collectionsr   typingr   r   r   r   adaptersr	   r
   r   r   r   r   r   r   r   r   rJ   r   r   r   r   r   r   filtersr   r   r   r   r   r   ro   rq   r$   r(   r)   r   r{   r   r   r   r   r"   r"   r"   r#   <module>   s&    0  " 1") 