
    tf"                     d   d Z ddlZddlZ ej                  dd      Z ej                  dd      Zd Zi ddd	d	d
d
dddddd	dd
ddddddddddddddddddd d!d"d#d$d%d&Zi ddd	dd
dddddddddddddddddd!dd"d'd#d'd$d(d%d(Zd) Z	d* Z
d+ Zd, Zd- Zy).z>Tools for working with files in the samtools pileup -c format.    NPileupSubstitution)

chromosomeposreference_basegenotypeconsensus_qualitysnp_qualitymapping_qualitycoverage
read_basesbase_qualitiesPileupIndel)r   r   r   r   r   r	   r
   r   first_allelesecond_allelereads_firstreads_second
reads_diffc              #     K   t         d t         t         t        t        t        t        t         t         f
}t         d t         t         t        t        t        t        t         t         t        t        t        f}| D ]z  }|dd j                         }|d   dk(  r/	 t        t	        ||      D cg c]  \  }} ||       c}}  M	 t        t	        ||      D cg c]  \  }} ||       c}}  | yc c}}w # t
        $ r t        j                  d|z        w xY wc c}}w # t
        $ r t        j                  d|z        w xY ww)a/  iterate over ``samtools pileup -c`` formatted file.

    *infile* can be any iterator over a lines.

    The function yields named tuples of the type :class:`pysam.Pileup.PileupSubstitution`
    or :class:`pysam.Pileup.PileupIndel`.

    .. note::

       The parser converts to 0-based coordinates
    c                     t        |       dz
  S N   intxs    V/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/pysam/Pileup.py<lambda>ziterate.<locals>.<lambda>0       Q!     c                     t        |       dz
  S r   r   r   s    r   r   ziterate.<locals>.<lambda>2   r   r   N   *zparsing error in line: `%s`)	strr   splitr   zip	TypeErrorpysamSamtoolsErrorr   )infile
conv_subst
conv_indellinedr   ys          r   iterater0   #   s*     +SsCc35J+S#ssCc3S:J  P"IOOQ43;P!SQ5G#HTQAaD#HIIP(C
A<N*ODAq1Q4*OPPP $I P))*G$*NOOP +P P))*G$*NOOPsZ   BE
C:)C4;C:E
D%D*D%1E
4C::"DE
D%%"EE
ACGTAACCGGTTUUUAGrGARCTr/   TCYACmCAMGTkTGKsSwW)CGGCATTArN   rP   c                 0    t         | j                            S )zencode genotypes like GG, GA into a one-letter code.
    The returned code is lower case if code[0] < code[1], otherwise
    it is uppercase.
    )ENCODE_GENOTYPEuppercodes    r   encodeGenotyperW   ^   s    
 4::<((r   c                     t         |    S )z|decode single letter genotypes like m, M into two letters.
    This is the reverse operation to :meth:`encodeGenotype`.
    )DECODE_GENOTYPErU   s    r   decodeGenotyperZ   f   s     4  r   c                 N  	
 d 	d 
	
fd}g g }}d}| D ]7  }	  |||      \  }}|j                  |       |dk7  s'|j                  |       9 d}|r
t               t        t        |            dk(  sJ d       |d	   }d
j	                  |      }||fS # t         $ r Y  Pw xY w)z*translate indel from vcf to pileup format.c                     t        t        |       t        |            }t        |      D ]  }| |   ||   k7  s| d| c S  | d| S )z'get common prefix of strings s1 and s2.Nminlenranges1s2nr   s       r   	getPrefixz0translateIndelGenotypeFromVCF.<locals>.getPrefixq   sP    BR!q 	A!u1~"1v	 "1vr   c                     t        t        |       t        |            }| d   |d   k7  ryt        d| dz
  d      D ]  }| |   ||   k7  s| |dz   d c S  | | d S )z&get common sufix of strings s1 and s2.r!    r   Nr]   ra   s       r   	getSuffixz0translateIndelGenotypeFromVCF.<locals>.getSuffixy   su    BR!b6RVrA262& 	"A!u1~!a%&z!	" 1"#wr   c                    | |k(  ryt        |      t        |       kD  r|j                  |       rd|t        |       d  z  t        |       dz
  fS |j                  |       rd|d t        |         z  dfS  ||       } ||       }t        |      t        |      z   t        |       z
  }|dk  r
t               d|t        |      t        |      |z
    z  t        |      dz
  fS t        |      t        |       k  r| j                  |      rd| t        |      d  z  t        |      dz
  fS | j                  |      rd| d t        |       z  dfS  ||       } ||       }t        |      t        |      z   t        |      z
  }|dk  r
t               d| t        |      t        |      |z
    z  t        |      fS J d       )N)r#   r   z-%sr   r!   r   z+%szsnp?)r_   
startswithendswith
ValueError)variantrefprefixsuffixsharedre   ri   s        r   getGenotypez2translateIndelGenotypeFromVCF.<locals>.getGenotype   s   c>s8c'l"~~g&s3w<=113w<!3CCCg&s>S\M22B66"30"30Vs6{2S\AA:$,&s3v;Vv1E/FGGVWXXXXG$!!#&ws3xy113s8a<??!!#&wyC11144"30"30Vs6{2SX=A:$,&ws6{S[65I3JKKSQW[XXf1r   Tr#   Fr   zmultiple offsets for indelr   /)rm   appendr_   setjoin)vcf_genotypesro   rs   	genotypesoffsetsis_errorrn   goffsetre   ri   s            @@r   translateIndelGenotypeFromVCFr~   m   s    #R RwIH  	#GS1IAv 	8NN6" ls7|!?#??!QZF#If#  		s   B	B$#B$c                    | j                   }| j                  }| j                  }|g| j                  z   }| |   }|d   }t	        |      dkD  rt        dt        |       z        |d   }|d   dk(  ry|D cg c]  }|dk7  s	|t        |          }}|j                  ddg      d   x}	}
| j                  j                  d	dg      d   }|j                  d
d      }t	        |      dkD  s/t        | j                  D cg c]  }t	        |       c}      dkD  r5t        ||      \  }}t        |||z   d||
|	|||dt	        |      z  ddd      S t        dj                  |            }d}d}t        |||||
|	||||
      S c c}w c c}w )z$convert vcf record to pileup record.rF   r   z%only single genotype per position, %sr   .Nrt   GQMQDPr#   <rg   )contigr   ro   altr_   rm   r$   r   getinfomaxr~   r   rW   rw   r   )vcfsampler   r   	referenceallellesdatary   r   r	   r   r
   r   r   r}   r   r   s                    r   
vcf2pileupr      s    J
''CI{SWW$Hv;D T
I
9~@CHMNN!I |s+4AaS#a&!AIA '+hhtaS&9!&<<K#hhll4!-a0Oxxa H
9~S#''!:Q#a&!:;a?8IN&:<#,&*##X. 	 ""'')"45
!*c9"*,="-"*J"0	2 	2= B ";s   6
FF>Fc              #      K   t        j                         }|j                  |        ||j                         vrt	        d      |j                         D ]  }t        ||      }|s|  yw)a  iterate over a vcf-formatted file.

    *infile* can be any iterator over a lines.

    The function yields named tuples of the type
    :class:`pysam.Pileup.PileupSubstitution` or
    :class:`pysam.Pileup.PileupIndel`.

    Positions without a snp will be skipped.

    This method is wasteful and written to support same legacy code
    that expects samtools pileup output.

    Better use the vcf parser directly.

    zsample %s not vcf fileN)r(   VCFconnect
getsamplesKeyErrorfetchr   )r*   r   r   rowresults        r   iterate_from_vcfr      sc     " ))+CKKS^^%%/00yy{ C(Ls   A$A.'A.)__doc__collectionsr(   
namedtupler   r   r0   rS   rY   rW   rZ   r~   r   r    r   r   <module>r      s   D  +[++,@
-BC  %k$$]&78"P@		3	S	"%s	#	S		%)3	04c	 	#	 S	 	#		 S		
 	#	
 S	 	#	 S	 S
S	  	
 
 D  D  D  D  D  D )!Vr72tr   