
    DUf<@                         d dl Z d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d ZddZd Zd ZddZdd	Zd
 Zd Z G d de          Zedk    r e             dS dS )    N)Fastawrap_sequence
FetchError
ucsc_split	bed_splitget_valid_filename)defaultdictc           
          t           j                             j                  \  }}|r
|dd          }t	          j         j                  j        } j        r fd}t           j         j
        t           j                   j          j        | j         j                   }t#                     \  }}|s|                                }d}|D ]} ||          \  }	}
}|	d k    r j        rB|
|||
z
  }nt)          ||	                   } j        d         |k    s j        d         |k     rb j        rCd                    d |	|
||fD                       }t/          |          }t1          |d          }n j        r j        }nt4          j        }	  j        rK|s" j        d	k    r|                    d
           d}|                    t=           ||	|
|                     n+t?           ||	|
|          D ]}|                    |           n1# t@          $ r$}tA          tC          |          dz             d }~ww xY w j        r|"                                 |#                                 d S )N   c                 ^    t          j        j                                      |            S N)recompileregexsearch)xargss    H/var/www/html/software/conda/lib/python3.11/site-packages/pyfaidx/cli.py<lambda>z write_sequence.<locals>.<lambda>   s$    bj&<&<&C&CA&F&F"F     )default_seqkey_functionstrict_bounds
split_charfilt_functionread_long_namesrebuildFr   .c              3   8   K   | ]}|t          |          V  d S r   )str).0es     r   	<genexpr>z!write_sequence.<locals>.<genexpr>'   s-      MM11MAMMMMMMr   w
nucleotidez name	start	end	A	T	C	G	N	others
Tz Try setting --lazy.
)$ospathsplitextfastar   r   r   r   invert_matchr   r   evalheader_functionlazy	delimiter
long_names
no_rebuildsplit_regionskeys
size_rangelensplit_filesjoinr   openoutsysstdout	transformwritetransform_sequencefetch_sequencer   r    close__exit__)r   _extr   r)   regions_to_fetchsplit_functionheaderregionnamestartendsequence_lenfilenameoutfileliner"   s   `                r   write_sequencerN   	   s   Wdj))FAs
 !""gJtz**1M GFFFF$*$*:dNbIcIcw{  xA  tA  NR  N\  ly  KO  KZ  hl  hw  dw  x  x  xE'4T':':$n ( ::<<F"    )>&11eS4<<? 	 S_"U{"5;//q!L00DOA4F4U4U 	!xxMMuc30GMMMMMH)(33H8S))GGX 	!hGGjG
	@~ ( "$.L"@"@MM"MNNN!F0udE3OOPPPP*4eSII ( (DMM$'''' 	@ 	@ 	@SVV&>>???	@ 	MMOOO	NNs   A=H
I$IIc           	   #   x  K   	 |j         j        |         j        }| j        r-||k    r'|%|#||         |dz
  |dz            }|j        j        }n||         ||         }nG# t          $ r: t          j        	                     dj
        di t                                 Y d S w xY w| j        r|j        }| j        r|j        }| j        rd S | j        rnJ|s|r'| j        s d                    d|j        dg          V  nd                    d|j        dg          V  t%          ||j                  D ]}|V  d S )Nr   z"warning: {name} not found in file
 >
 )faidxindexlencauto_strandreverse
complementKeyErrorr9   stderrr<   formatlocals	no_outputno_names	no_coordsr6   
fancy_namerG   r   seq)r   r)   rG   rH   rI   line_lensequencerM   s           r   r>   r>   >   s     
;$T*/ 	.0AcoT{37519#45H'2HHT{59-H   
E>EQQQQRRR  '&| $#~ } 6 	6S 	6$. 	6''3 3T:;;;;;;''3t455555h55  



 s   AA   A B$#B$c                    t          | j        d| j                  }t          |           \  }}|D ]} ||          \  }}}| j        rW|r|r||z
  }n7|s|st          ||                   }nt          ||         ||                   }|| j        z  ||         ||<   o| j        r-||         ||                                         ||         ||<   d S )NT)mutabler   )	r   r)   r.   r1   mask_with_default_seqr4   r   mask_by_case	lowercase)	r   r)   rC   rD   rF   rnamerH   rI   spans	            r   mask_sequencerl   [   s   $*dt~FFFE'4T':':$n" J J*N622uc% 		J 4 4U{ 43 45<((5<c	233&*T-=&=E%Ls## 	J&+El59&=&G&G&I&IE%Ls#J Jr   c                 R    | j         r| j         }t          }n| j        }t          }||fS r   )bedr   regionsr   )r   rC   rD   s      r   r1   r1   n   s3    x $8"<#n--r   c                 f   |j         j        |         j        }||         ||         }| j        r|j        }| j        r|j        }| j        rd S | j        dk    r*d                    |j        |j	        dz
  |j
                  S | j        dk    r)d                    |j        t          |                    S | j        dk    r.t          |                                          t          t                    }|                    fd	t#                    D                        |                    d
d          }|                    dd          }	|                    dd          }
|                    dd          }|                    dd          }d                    d |                                D                       } dj        d|j        |j	        |j
        dt+                      S | j        dk    r5d                    |j        |j	        |j
        t          |                    S d S )Nrn   z{name}	{start}	{end}
r   )rG   rH   rI   
chromsizesz{name}	{length}
)rG   lengthr%   c                 >    g | ]}|                     |          fS rS   )count)r!   csss     r   
<listcomp>z&transform_sequence.<locals>.<listcomp>   s(    777!a!%777r   Ar   TCGN|c                 \    g | ])\  }}d                      |t          |          f          *S ):)r6   r    )r!   kvs      r   rw   z&transform_sequence.<locals>.<listcomp>   s2    JJJTQ388QAK00JJJr   z5{sname}	{sstart}	{send}	{A}	{T}	{C}	{G}	{N}	{others}
)snamesstartsend
transposedz{name}	{start}	{end}	{seq}
)rG   rH   rI   rb   rS   )rT   rU   rV   rY   rX   r^   r;   r\   rG   rH   rI   r4   r    upperr	   intupdatesetpopr6   itemsr]   )r   r)   rG   rH   rI   rc   snucsrx   ry   rz   r{   r|   othersrv   s                 @r   r=   r=   x   s3   { &+HdE#IA L| I~ ~)00afAGaKVWV[0\\\	<	'	'#**s1vv*FFF	<	'	'VV\\^^37777s2ww777888HHS!HHS!HHS!HHS!HHS!JJTZZ\\JJJKKVOV  M]^]clmlsz{z  M  M  DJ  DL  DL  M  M  	M	<	'	'077QV17XYX]cfghcici7jjj 
(	'r   c                    ddl m} t          j        dd          }|                    dt
          d           |                    d	t
          d
d           |                    d          }|                    d          }|                    d          }|                    ddt          j        d          d           |                    ddt          j        d          d           |                    ddt
          dd           |                    dddd d!"           |                    d#d$dd d%"           |                    d&d'dd d("           |                    d)d*t          d d+,           |	                                }|                    d-d.dd d/"           |                    d0d1dd d2"           |                    d3d4dd d5"           |                    d6d7dd d8"           |                    d9d:dd d;"           |                    d<d=t          d d>,           |                    d?d@t
          d dA,           |                    dBdCt
          dDdE,           |                    dFdGt
          dHdIdJK           |                    dL          }|                    dMdNt
          dOdP,           |                    dQdRdd dS"           |	                                }|                    dTdUdd dV"           |                    dWdXdd dY"           |                    dZdd d["           |                    d\dd d]"           |                    d^d_|d`a           t          t          j                  dbk    r+| s)|                                 t          j        db           n,| r|                    |           }	n|                                }	|	j        rL|	j        rt          j                            dc           |	j        rt          j                            dd           |	j        s|	j        rt1          |	           d S t3          |	           d S )eNr   )__version__zFetch sequences from FASTA. If no regions are specified, all entries in the input file are returned. Input FASTA file must be consistently line-wrapped, and line wrapping of output is based on input line lengths.zPlease cite: Shirley MD, Ma Z, Pedersen BS, Wheelan SJ. (2015) Efficient "pythonic" access to FASTA files using pyfaidx. PeerJ PrePrints 3:e1196 https://dx.doi.org/10.7287/peerj.preprints.970v1)descriptionepilogr)   z
FASTA file)typehelpro   *z=space separated regions of sequence to fetch e.g. chr1:1-1000)r   nargsr   zinput optionszoutput optionszheader optionsz-bz--bedrz1bed file of regions (zero-based start coordinate)z-oz--outr$   z"output file name (default: stdout)z-iz--transform)rn   rq   r%   r   zItransform the requested regions into another format. default: %(default)s)r   choicesr   z-cz--complement
store_trueFz-complement the sequence. default: %(default)s)actiondefaultr   z-rz	--reversez*reverse the sequence. default: %(default)sz-yz--auto-strandzQreverse complement the sequence when start > end coordinate. default: %(default)sz-az--size-rangezZselected sequences are in the size range [low, high]. example: 1,1000 default: %(default)s)r   r   r   z-nz
--no-namesz5omit sequence names from output. default: %(default)sz-fz--long-nameszpoutput full (long) names from the input fasta headers. default: headers are truncated after the first whitespacez-tz--no-coordszOomit coordinates (e.g. chr:start-end) from output headers. default: %(default)sz-xz--split-fileszEwrite each region to a separate file (names are derived from regions)z-lz--lazyz>fill in --default-seq for missing ranges. default: %(default)sz-sz--default-seqzDdefault base for missing positions and masking. default: %(default)sz-dz--delimiterzjdelimiter for splitting names to multiple values (duplicate names will be discarded). default: %(default)sz-ez--header-functionzlambda x: x.split()[0]z]python function to modify header lines e.g: "lambda x: x.split("|")[0]". default: %(default)sz-uz--duplicates-actionstop)r   firstlastlongestshortestzQentry to take when duplicate sequence names are encountered. default: %(default)s)r   r   r   r   zmatching argumentsz-gz--regexz.*zNselected sequences are those matching regular expression. default: %(default)sz-vz--invert-matchzRselected sequences are those not matching 'regions' argument. default: %(default)sz-mz--mask-with-default-seqz<mask the FASTA file using --default-seq default: %(default)sz-Mz--mask-by-casezBmask the FASTA file by changing to lowercase. default: %(default)sz--no-outputz0do not output any sequence. default: %(default)sz--no-rebuildzMdo not rebuild the .fai index even if it is out of date. default: %(default)sz	--versionversionzprint pyfaidx version number)r   r   r   r   zQ--auto-strand and --complement are both set. Are you sure this is what you want?
zN--auto-strand and --reverse are both set. Are you sure this is what you want?
)pyfaidxr   argparseArgumentParseradd_argumentr    add_argument_groupFileTypeparse_size_rangeadd_mutually_exclusive_groupcheck_seq_lengthr4   r9   argv
print_helpexit
parse_argsrW   rY   r[   r<   rX   rg   rh   rl   rN   )
ext_argsr   parser_inputoutputrE   namesmatchermaskingr   s
             r   mainr      s   ######$  2H -rs s sF
c===
	3=|}}}&&77F&&'788F&&'788F
gH,=c,B,BI|}}}
gH,=c,B,BImnnn
m#?p  xC  D  D  D
n\5  XG  H  H  H
k,  UA  B  B  B
olE  Yl  m  m  m
n3CT  Yu  v  v  v//11E	t\,  UL  M  M  M	t^L%  WI  J  J  J
mL%  Wh  i  i  i
olE  Y`  a  a  a
h|U  RR  S  S  S
o4Dd  Z`  a  a  a
m#t  Kw  x  x  x
1E]  eD  E  E  E
3#v  XH  Ob  c  c  c''(<==GysD  HX  Y  Y  Y/e  [o  p  p  p1133G8W\  db  c  c  c/e  [_  `  `  `
lE  QC  D  D  D
|U  Ra  b  b  b
I{Qoppp
38}}a	 #  **  "" p? 	sJqrrr< 	pJnooo! T%6 dtr   c                 Z    | n't          |           dk    rt          j        d          | S )Nr   z/--default-seq value must be a single character!)r4   r   ArgumentTypeError)values    r   r   r      s0    }	Uq()Z[[[Lr   c                    | | S 	 |                      dd                               dd                              d          \  }}n"# t          t          t          f$ r t          w xY wt          |          t          |          fS )zK Size range argument should be in the form start,end and is end-inclusive. N rP   	,)replacesplit	TypeError
ValueError
IndexErrorr   )r   rH   rI   s      r   r   r      s    }]]3++33D"==CCCHH
ssz:.   JJC!!s   A A A&c                   x    e Zd ZdZddZd ZddZd Zedd            Z	ddZ
d	 Zd
 Zd Zd Zd Zd Zd ZdS )CounterzDict subclass for counting hashable objects.  Sometimes called a bag
    or multiset.  Elements are stored as dictionary keys and their counts
    are stored as dictionary values.
    Nc                 "     | j         |fi | dS )zCreate a new, empty Counter object.  And if given, count elements
        from an input iterable.  Or, initialize the count from another mapping
        of elements to their counts.
        N)r   )selfiterablekwdss      r   __init__zCounter.__init__   s$    
 	H%%%%%%%r   c                     dS )Nr   rS   )r   keys     r   __missing__zCounter.__missing__   s    qr   c                     |1t          |                                 t          d          d          S t          ||                                 t          d                    S )zList the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.
        Nr   T)r   rX   )r   )sorted	iteritems
itemgetternlargest)r   ns     r   most_commonzCounter.most_common   sS     9$..**
1tLLLL4>>++A????r   c              #   l   K   |                                  D ]\  }}t          d|          D ]}|V  dS )zIterator over elements repeating each as many times as its count.

        If an element's count has been set to zero or is a negative number,
        elements() will ignore it.

        N)r   repeat)r   elemrt   rA   s       r   elementszCounter.elements   sX        >>++ 	 	KD%D%((  



	 	r   c                      t          d          )Nz@Counter.fromkeys() is undefined.  Use Counter(iterable) instead.)NotImplementedError)clsr   r   s      r   fromkeyszCounter.fromkeys  s    !NP P 	Pr   c                 <   |t          |d          rR| r4| j        }|                                D ]\  }} ||d          |z   | |<   n:t                              | |           n| j        }|D ]} ||d          dz   | |<   |r|                     |           dS dS )zLike dict.update() but add counts instead of replacing them.

        Source can be an iterable, a dictionary, or another Counter instance.

        Nr   r   r   )hasattrgetr   dictr   )r   r   r   self_getr   rt   s         r   r   zCounter.update  s     x-- 
7 0#xH'/'9'9';'; ? ?e%-XdA%6%6%>T

? KKh////8$ 7 7D!)$!2!2Q!6DJJ 	KK	 	r   c                      t          |           S )zBLike dict.copy() but returns a Counter instance instead of a dict.)r   )r   s    r   copyzCounter.copy   s    t}}r   c                 H    || v rt                               | |           dS dS )zGLike dict.__delitem__() but does not raise KeyError for missing values.N)r   __delitem__)r   r   s     r   r   zCounter.__delitem__$  s.    4<<T4((((( <r   c                     | sd| j         j        z  S d                    t          dj        |                                                     }| j         j        d|dS )Nz%s()z, z%r: %rz({z}))	__class____name__r6   map__mod__r   )r   r   s     r   __repr__zCounter.__repr__)  s^     	4DN333		#h.0@0@0B0BCCDD!^444eee<<r   c                     t          |t                    st          S t                      }t          |           t          |          z  D ]}| |         ||         z   }|dk    r|||<   |S )z'Add counts from two counters.

        r   
isinstancer   NotImplementedr   r   otherresultr   newcounts        r   __add__zCounter.__add__8  o     %)) 	"!!IIE

* 	( 	(DDzE$K/H!||'tr   c                     t          |t                    st          S t                      }t          |           t          |          z  D ]}| |         ||         z
  }|dk    r|||<   |S )zF Subtract count, but keep only results with positive counts.

        r   r   r   s        r   __sub__zCounter.__sub__E  r   r   c                     t          |t                    st          S t          }t                      }t	          |           t	          |          z  D ]%} || |         ||                   }|dk    r|||<   &|S )zHUnion is the maximum of value in either of the input counters.

        r   )r   r   r   maxr   )r   r   _maxr   r   r   s         r   __or__zCounter.__or__R  s{     %)) 	"!!IIE

* 	( 	(DtDJd44H!||'tr   c                 &   t          |t                    st          S t          }t                      }t	          |           t	          |          k     r|| }} t          | j        |          D ]%} || |         ||                   }|dk    r|||<   &|S )z? Intersection is the minimum of corresponding counts.

        r   )r   r   r   minr4   filter__contains__)r   r   _minr   r   r   s         r   __and__zCounter.__and__`  s     %)) 	"!!t99s5zz!!%D4,e44 	( 	(DtDJd44H!||'tr   r   )r   
__module____qualname____doc__r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   rS   r   r   r   r      s	        
& & & &  @ @ @ @	 	 	 P P P [P   *  ) ) )
= = =          r   r   __main__)NNr   )r   r9   os.pathr&   r   r   r   r   r   r   r   r   collectionsr	   rN   r>   rl   r1   r=   r   r   r   r   r   r   rS   r   r   <module>r     sR    



  				 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ # # # # # #2 2 2j   :J J J&. . .k k k k:5 5 5 5r  " " "M M M M Md M M M^ zDFFFFF r   