
    tf<@                         d dl Z d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d ZddZd Zd ZddZdd	Zd
 Zd Z G d de      Zedk(  r e        yy)    N)Fastawrap_sequence
FetchError
ucsc_split	bed_splitget_valid_filename)defaultdictc           
          t         j                  j                   j                        \  }}|r|dd  }t	        j
                   j                        j                  } j                  r fd}t         j                   j                  t         j                         j                    j                  | j                   j                          }t#               \  }}|s|j%                         }d}|D ]Z  } ||      \  }	}
}|	d k(  r j&                  r=|
|||
z
  }nt)        ||	         } j&                  d   |kD  s j&                  d   |k  r_ j*                  r4dj-                  d |	|
||fD              }t/        |      }t1        |d      }n) j2                  r j2                  }nt4        j6                  }	  j8                  rC|s" j8                  d	k(  r|j;                  d
       d}|j;                  t=         ||	|
|             n%t?         ||	|
|      D ]  }|j;                  |         j*                  sK|jE                          ] |jG                          y # t@        $ r}tA        tC        |      dz         d }~ww xY w)N   c                 b    t        j                  j                        j                  |        S N)recompileregexsearch)xargss    U/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/pyfaidx/cli.py<lambda>z write_sequence.<locals>.<lambda>   s"    bjj&<&C&CA&F"F     )default_seqkey_functionstrict_bounds
split_charfilt_functionread_long_namesrebuildFr   .c              3   8   K   | ]  }|st        |        y wr   )str).0es     r   	<genexpr>z!write_sequence.<locals>.<genexpr>'   s     M11AMs   w
nucleotidez name	start	end	A	T	C	G	N	others
Tz Try setting --lazy.
)$ospathsplitextfastar   r   r   r   invert_matchr   r   evalheader_functionlazy	delimiter
long_names
no_rebuildsplit_regionskeys
size_rangelensplit_filesjoinr   openoutsysstdout	transformwritetransform_sequencefetch_sequencer   r    close__exit__)r   _extr   r)   regions_to_fetchsplit_functionheaderregionnamestartendsequence_lenfilenameoutfileliner"   s   `                r   write_sequencerN   	   s   WWdjj)FAs
!"gJJtzz*11MF$**$*:*:dNbNbIcw{  xA  xA  tA  NR  N\  N\  ly  KO  KZ  KZ  hl  hw  hw  dw  xE'4T':$n ::<F"  )&1eS4<?? S_"U{"5;/q!L0DOOA4F4UxxMuc30GMMH)(3H8S)GXXhhGjjG
	@~~$..L"@MM"MN!F0udE3OP*4eSI (DMM$'( MMOA B 
NN	  	@SV&>>??	@s    A4I&&	J/JJc           	   #     K   	 |j                   j                  |   j                  }| j                  r.||kD  r)|'|%||   |dz
  |dz    }|j                  j
                  }n||   || }| j
                  r|j
                  }| j                  r|j                  }| j                  ry | j                  rnQ|s|r-| j                  s!dj                  d|j                   dg       n dj                  d|j"                  dg       t%        ||j&                        D ]  }|  y # t        $ r: t        j                  j                   dj                  di t                      Y y w xY ww)Nr   z"warning: {name} not found in file
 >
 )faidxindexlencauto_strandreverse
complementKeyErrorr9   stderrr<   formatlocals	no_outputno_names	no_coordsr6   
fancy_namerG   r   seq)r   r)   rG   rH   rI   line_lensequencerM   s           r   r>   r>   >   sI    
;;$$T*//0AcoT{375195H''22HT{5-H &&||##~~}}S$..''3 3 3T:;;''3t455h5 
!  

E>EEQQRs+   E*A%D$ )B;E*$A E'$E*&E''E*c                 |   t        | j                  d| j                        }t        |       \  }}|D ]  } ||      \  }}}| j                  rD|r|r||z
  }n$|s|st        ||         }nt        ||   ||       }|| j                  z  ||   || _| j                  sl||   || j                         ||   ||  y )NT)mutabler   )	r   r)   r.   r1   mask_with_default_seqr4   r   mask_by_case	lowercase)	r   r)   rC   rD   rF   rnamerH   rI   spans	            r   mask_sequencerl   [   s    $**dt~~FE'4T':$n" J*62uc%%U{35<(5<c23&*T-=-=&=E%Ls#&+El5&=&G&G&IE%Ls#Jr   c                 r    | j                   r| j                   }t        }||fS | j                  }t        }||fS r   )bedr   regionsr   )r   rC   rD   s      r   r1   r1   n   sA    xx88" n--  <<#n--r   c                    |j                   j                  |   j                  }||   || }| j                  r|j                  }| j                  r|j                  }| j
                  ry | j                  dk(  r5dj                  |j                  |j                  dz
  |j                        S | j                  dk(  r&dj                  |j                  t        |            S | j                  dk(  rCt        |      j                         }t        t              }|j!                  t#        |      D 	cg c]  }	|	|j%                  |	      f c}	       |j'                  d	d
      }
|j'                  dd
      }|j'                  dd
      }|j'                  dd
      }|j'                  dd
      }dj)                  |j+                         D cg c]!  \  }}dj)                  |t        |      f      # c}}      } dj                  d|j                  |j                  |j                  dt-               S | j                  dk(  r<dj                  |j                  |j                  |j                  t        |            S y c c}	w c c}}w )Nrn   z{name}	{start}	{end}
r   )rG   rH   rI   
chromsizesz{name}	{length}
)rG   lengthr%   Ar   TCGN|:z5{sname}	{sstart}	{send}	{A}	{T}	{C}	{G}	{N}	{others}
)snamesstartsend
transposedz{name}	{start}	{end}	{seq}
)rG   rH   rI   rb   rS   )rT   rU   rV   rY   rX   r^   r;   r\   rG   rH   rI   r4   r    upperr	   intupdatesetcountpopr6   itemsr]   )r   r)   rG   rH   rI   rc   sssnucscrs   rt   ru   rv   rw   kvotherss                     r   r=   r=   x   s!   {{  &++HdE#ALL||II~~~~)00affAGGaKVWV[V[0\\	<	'#**s1v*FF	<	'V\\^3s2w7!a!%78HHS!HHS!HHS!HHS!HHS!TZZ\JTQ388QAK0JKVOVV  M]^]c]clmlslsz{zz  M  DJ  DL  M  	M	<	'077QVV177XYX]X]cfghci7jj 
( 8 Ks   2J&J
c                    ddl m} t        j                  dd      }|j	                  dt
        d       |j	                  d	t
        d
d       |j                  d      }|j                  d      }|j                  d      }|j	                  ddt        j                  d      d       |j	                  ddt        j                  d      d       |j	                  ddt
        dd       |j	                  dddd d!"       |j	                  d#d$dd d%"       |j	                  d&d'dd d("       |j	                  d)d*t        d d+,       |j                         }|j	                  d-d.dd d/"       |j	                  d0d1dd d2"       |j	                  d3d4dd d5"       |j	                  d6d7dd d8"       |j	                  d9d:dd d;"       |j	                  d<d=t        d d>,       |j	                  d?d@t
        d dA,       |j	                  dBdCt
        dDdE,       |j	                  dFdGt
        dHdIdJK       |j                  dL      }|j	                  dMdNt
        dOdP,       |j	                  dQdRdd dS"       |j                         }|j	                  dTdUdd dV"       |j	                  dWdXdd dY"       |j	                  dZdd d["       |j	                  d\dd d]"       |j	                  d^d_|d`a       t        t        j                        dbk(  r(| s&|j                          t        j                  db       n$| r|j!                  |       }	n|j!                         }		j"                  rV|	j$                  rt        j&                  j)                  dc       |	j*                  rt        j&                  j)                  dd       |	j,                  s|	j.                  rt1        |	       y t3        |	       y )eNr   )__version__zFetch sequences from FASTA. If no regions are specified, all entries in the input file are returned. Input FASTA file must be consistently line-wrapped, and line wrapping of output is based on input line lengths.zPlease cite: Shirley MD, Ma Z, Pedersen BS, Wheelan SJ. (2015) Efficient "pythonic" access to FASTA files using pyfaidx. PeerJ PrePrints 3:e1196 https://dx.doi.org/10.7287/peerj.preprints.970v1)descriptionepilogr)   z
FASTA file)typehelpro   *z=space separated regions of sequence to fetch e.g. chr1:1-1000)r   nargsr   zinput optionszoutput optionszheader optionsz-bz--bedrz1bed file of regions (zero-based start coordinate)z-oz--outr$   z"output file name (default: stdout)z-iz--transform)rn   rq   r%   r}   zItransform the requested regions into another format. default: %(default)s)r   choicesr   z-cz--complement
store_trueFz-complement the sequence. default: %(default)s)actiondefaultr   z-rz	--reversez*reverse the sequence. default: %(default)sz-yz--auto-strandzQreverse complement the sequence when start > end coordinate. default: %(default)sz-az--size-rangezZselected sequences are in the size range [low, high]. example: 1,1000 default: %(default)s)r   r   r   z-nz
--no-namesz5omit sequence names from output. default: %(default)sz-fz--long-nameszpoutput full (long) names from the input fasta headers. default: headers are truncated after the first whitespacez-tz--no-coordszOomit coordinates (e.g. chr:start-end) from output headers. default: %(default)sz-xz--split-fileszEwrite each region to a separate file (names are derived from regions)z-lz--lazyz>fill in --default-seq for missing ranges. default: %(default)sz-sz--default-seqzDdefault base for missing positions and masking. default: %(default)sz-dz--delimiterzjdelimiter for splitting names to multiple values (duplicate names will be discarded). default: %(default)sz-ez--header-functionzlambda x: x.split()[0]z]python function to modify header lines e.g: "lambda x: x.split("|")[0]". default: %(default)sz-uz--duplicates-actionstop)r   firstlastlongestshortestzQentry to take when duplicate sequence names are encountered. default: %(default)s)r   r   r   r   zmatching argumentsz-gz--regexz.*zNselected sequences are those matching regular expression. default: %(default)sz-vz--invert-matchzRselected sequences are those not matching 'regions' argument. default: %(default)sz-mz--mask-with-default-seqz<mask the FASTA file using --default-seq default: %(default)sz-Mz--mask-by-casezBmask the FASTA file by changing to lowercase. default: %(default)sz--no-outputz0do not output any sequence. default: %(default)sz--no-rebuildzMdo not rebuild the .fai index even if it is out of date. default: %(default)sz	--versionversionzprint pyfaidx version number)r   r   r   r   zQ--auto-strand and --complement are both set. Are you sure this is what you want?
zN--auto-strand and --reverse are both set. Are you sure this is what you want?
)pyfaidxr   argparseArgumentParseradd_argumentr    add_argument_groupFileTypeparse_size_rangeadd_mutually_exclusive_groupcheck_seq_lengthr4   r9   argv
print_helpexit
parse_argsrW   rY   r[   r<   rX   rg   rh   rl   rN   )
ext_argsr   parser_inputoutputrE   namesmatchermaskingr   s
             r   mainr      sU   #$$  2H -rsF
c=
	3=|}&&7F&&'78F&&'78F
gH,=,=c,BI|}
gH,=,=c,BImn
m#?p  xC  D
n\5  XG  H
k,  UA  B
olE  Yl  m
n3CT  Yu  v//1E	t\,  UL  M	t^L%  WI  J
mL%  Wh  i
olE  Y`  a
h|U  RR  S
o4Dd  Z`  a
m#t  Kw  x
1E]  eD  E
3#v  XH  Ob  c''(<=GysD  HX  Y/e  [o  p113G8W\  db  c/e  [_  `
lE  QC  D
|U  Ra  b
I{Qop
388}a	  *  "??JJqr<<JJno!!T%6%6dtr   c                 V    | 	 | S t        |       dk7  rt        j                  d      | S )Nr   z/--default-seq value must be a single character!)r4   r   ArgumentTypeError)values    r   r   r      s5    } L 
Uq(()Z[[Lr   c                     | | S 	 | j                  dd      j                  dd      j                  d      \  }}t        |      t        |      fS # t        t        t        f$ r t        w xY w)zK Size range argument should be in the form start,end and is end-inclusive.  rP   	,)replacesplit	TypeError
ValueError
IndexErrorr   )r   rH   rI   s      r   r   r      sp    }]]3+33D"=CCCH
s JC!! z:. s   4A A+c                   p    e Zd ZdZddZd ZddZd Zedd       Z	ddZ
d	 Zd
 Zd Zd Zd Zd Zd Zy)CounterzDict subclass for counting hashable objects.  Sometimes called a bag
    or multiset.  Elements are stored as dictionary keys and their counts
    are stored as dictionary values.
    Nc                 *     | j                   |fi | y)zCreate a new, empty Counter object.  And if given, count elements
        from an input iterable.  Or, initialize the count from another mapping
        of elements to their counts.
        N)r   )selfiterablekwdss      r   __init__zCounter.__init__   s    
 	H%%r   c                      y)Nr   rS   )r   keys     r   __missing__zCounter.__missing__   s    r   c                     |%t        | j                         t        d      d      S t        || j                         t        d            S )zList the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.
        r   T)r   rX   )r   )sorted	iteritems
itemgetternlargest)r   ns     r   most_commonzCounter.most_common   s=     9$..*
1tLL4>>+A??r   c              #   f   K   | j                         D ]  \  }}t        d|      D ]  }|   yw)zIterator over elements repeating each as many times as its count.

        If an element's count has been set to zero or is a negative number,
        elements() will ignore it.

        N)r   repeat)r   elemr   rA   s       r   elementszCounter.elements   s=       >>+ 	KD%D%( 
	s   /1c                     t        d      )Nz@Counter.fromkeys() is undefined.  Use Counter(iterable) instead.)NotImplementedError)clsr   r   s      r   fromkeyszCounter.fromkeys  s    !NP 	Pr   c                 &   |{t        |d      rM| r4| j                  }|j                         D ]  \  }} ||d      |z   | |<    n9t        j	                  | |       n"| j                  }|D ]  } ||d      dz   | |<    |r| j	                  |       yy)zLike dict.update() but add counts instead of replacing them.

        Source can be an iterable, a dictionary, or another Counter instance.

        Nr   r   r   )hasattrgetr   dictr   )r   r   r   self_getr   r   s         r   r   zCounter.update  s     x-#xxH'/'9'9'; ?e%-dA%6%>T
? KKh/88$ 7D!)$!2Q!6DJ7KK r   c                     t        |       S )zBLike dict.copy() but returns a Counter instance instead of a dict.)r   )r   s    r   copyzCounter.copy   s    t}r   c                 :    || v rt         j                  | |       yy)zGLike dict.__delitem__() but does not raise KeyError for missing values.N)r   __delitem__)r   r   s     r   r   zCounter.__delitem__$  s    4<T4( r   c                     | sd| j                   j                  z  S dj                  t        dj                  | j                                     }| j                   j                  d|dS )Nz%s()z, z%r: %rz({z}))	__class____name__r6   map__mod__r   )r   r   s     r   __repr__zCounter.__repr__)  sS    DNN3333		#h..0@0@0BCD!^^44e<<r   c                     t        |t              st        S t               }t        |       t        |      z  D ]  }| |   ||   z   }|dkD  s|||<    |S )z'Add counts from two counters.

        r   
isinstancer   NotImplementedr   r   otherresultr   newcounts        r   __add__zCounter.__add__8  `     %)!!IE
* 	(DDzE$K/H!|'t	( r   c                     t        |t              st        S t               }t        |       t        |      z  D ]  }| |   ||   z
  }|dkD  s|||<    |S )zF Subtract count, but keep only results with positive counts.

        r   r   r   s        r   __sub__zCounter.__sub__E  r   r   c                     t        |t              st        S t        }t               }t	        |       t	        |      z  D ]  } || |   ||         }|dkD  s|||<    |S )zHUnion is the maximum of value in either of the input counters.

        r   )r   r   r   maxr   )r   r   _maxr   r   r   s         r   __or__zCounter.__or__R  sg     %)!!IE
* 	(DDJd4H!|'t	( r   c                     t        |t              st        S t        }t               }t	        |       t	        |      k  r|| }} t        | j                  |      D ]  } || |   ||         }|dkD  s|||<    |S )z? Intersection is the minimum of corresponding counts.

        r   )r   r   r   minr4   filter__contains__)r   r   _minr   r   r   s         r   __and__zCounter.__and__`  s     %)!!t9s5z!%D4,,e4 	(DDJd4H!|'t	( r   r   )r   
__module____qualname____doc__r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   rS   r   r   r   r      sZ    
&@	 P P*)
=r   r   __main__)NNr   )r   r9   os.pathr&   r   r   r   r   r   r   r   r   collectionsr	   rN   r>   rl   r1   r=   r   r   r   r   r   r   rS   r   r   <module>r     si     
  	 _ _ #2j:J&.k:5r"Md M^ zF r   