
    DUf0                        d dl Z d dlmZ d dlZd dlZd dlZd dlZddlm	Z	 ddl
mZmZ 	 d dlmZ n# e$ r  ee j        d          ZY nw xY w ee          ZdZd	Zd
ZdZdZd Zd Zd Zd Zd Zd Z ej                     ej        d ej         dd          d           ej        d ej         d          d           ej!        dddde"e           ej!        dd dd!e"e           ej!        d"d#dd$e"e           ej!        d%d&dd'e"e           ej!        d(d)d* ej#        d+d,g          d,d-           ej!        d.d/dd0d1           ej!        d2d3d4e"d5d-           ej!        d6d7d8dd0d1           ej!        d9d:e$d;d-           ej!        d<d=e$d>d-           ej!        d?d@e$A           ej!        dBdCdDE           ej!        dFdGdHe"A           ej!        dIdJdKe"A          dL                                                                                                                                                                                                             Z%dS )M    N   )
cmd_exists   )cli
get_logger)DEVNULLwbz6sort -k{C1},{C1} -k{P1},{P1}n -k{C2},{C2} -k{P2},{P2}nz6sort -k{C1},{C1} -k{C2},{C2} -k{P1},{P1}n -k{P2},{P2}nztabix -f -s{C1} -b{P1} -e{P1}z3pairix -f -s{C1} -d{C2} -b{P1} -e{P1} -u{P2} -v{P2}a  import sys
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)

instream = getattr(sys.stdin, "buffer", sys.stdin)
outstream = getattr(sys.stdout, "buffer", sys.stdout)

with open("{chromosomes_path}", "rb") as f:
    chrIDs = {{}}
    for i, line in enumerate(f, 1):
        chrom = line.split(b"\t")[0].strip()
        if chrom:
            chrIDs[chrom] = i

for line in instream:
    if line.startswith(b"{comment_char}"):
        continue
    parts = line.strip().split(b"{sep}")
    chrom1, chrom2 = parts[{c1}], parts[{c2}]
    if chrom1 in chrIDs and chrom2 in chrIDs:
        cid1, cid2 = chrIDs[chrom1], chrIDs[chrom2]
        pos1, pos2 = int(parts[{p1}]), int(parts[{p2}])
        if (cid1 > cid2) or ((cid1 == cid2) and (pos1 > pos2)):
            parts[{c1}], parts[{c2}] = parts[{c2}], parts[{c1}]
            parts[{p1}], parts[{p2}] = parts[{p2}], parts[{p1}]
        outstream.write(b"\t".join(parts) + b"\n")
c                  \    t          j        ddgt          t          t                    dk    S )Nsortz--parallel=1)stdinstdoutstderrr   )
subprocesscallr        M/var/www/html/software/conda/lib/python3.11/site-packages/cooler/cli/csort.py_has_parallel_sortr   ;   s3    ^$GGG	
 	
 	
 	r   c                 :    |dk    rt          j        d          |S )Nr   zField numbers are one-based)clickBadParameter)ctxparamvalues      r   _validate_fieldnumr   D   s"    zz !>???Lr   c                     |                      d          }|rZt          d          rdd| g}nIt          d          rdd| g}n4t          dt          j                   t          j        d           nd| g}|S )	aX  
    If input file appears gzipped based its extension, read using one of pigz
    or gzip for decompression. Otherwise assume uncompressed and use cat.

    Note
    ----
    Gzip decompression can't actually be parallelized, but pigz will create
    three extra threads that may help.
    See <https://github.com/madler/pigz/issues/36>.

    .gzpigzz-dcgzipzNo gzip decompressor found.filer   cat)endswithr   printsysr   exit)infileingzipread_cmds      r   make_read_commandr*   J   s     __U##F 	#f 	v.HH 	v.HH/cjAAAAHQKKKK6?Or   c           	      "   t          |           5 }t                              d           t          |d          D ]a\  }}|                    d          d                                         }|r-t                              |dz   t          |          z              b	 d d d            n# 1 swxY w Y   |d         dz
  }|d         dz
  }	|d         dz
  }
|d         dz
  }t                              | ||||	|
|	          }t          j
        d
|gS )Nz$Enumerating requested chromosomes...r   	r   C1C2P1P2)chromosomes_pathsepcomment_charc1c2p1p2-c)openloggerinfo	enumeratesplitstripstrFLIP_TEMPLATEformatr%   
executable)r1   r2   r3   fieldsfilinechromr4   r5   r6   r7   	flip_codes                r   make_flip_commandrI   e   s\   			 31:;;; A 	3 	3GAtJJt$$Q'--//E 3EDL3q661222	33 3 3 3 3 3 3 3 3 3 3 3 3 3 3 
	B		B		B		B$$)! %  I ND),,s   BB**B.1B.c                     dt           j        d<   | dk    r$t          j        t	          j        di |          }n)| dk    r#t          j        t          j        di |          }||z  }|S )NCLC_ALLtabixpairixr   )osenvironshlexr=   SORT_POSrA   SORT_BLK)indexrC   sort_optionssort_cmds       r   make_sort_commandrW      ss    BJx;x888899	(		;x888899HOr   c                     | dk    r$t          j        t          j        di |          }n)| dk    r#t          j        t	          j        di |          }|r|dgz  }g ||S )NrM   rN   -0r   )rQ   r=   	INDEX_TBXrA   	INDEX_PX2)rT   rC   
zero_basedoutfile	index_cmds        r   make_index_commandr_      s~    K	 0 : :6 : :;;			(		K	 0 : :6 : :;;	 dV	 Y   r   
pairs_pathT)exists
allow_dash
PAIRS_PATH)typemetavarr1   )ra   CHROMOSOMES_PATHz--chrom1z-c1z7chrom1 field number in the input file (starting from 1))requiredhelprd   callbackz--chrom2z-c2zchrom2 field numberz--pos1z-p1zpos1 field numberz--pos2z-p2zpos2 field numberz--indexz-iz+Select the preset sort and indexing optionsrM   rN   )rh   rd   defaultshow_defaultz--flip-onlyz9Only flip mates; no sorting or indexing. Write to stdout.F)rh   is_flagrj   rk   z--nprocz-pzNumber of processors   z--zero-basedrY   zRead positions are zero-basedz--sepz Data delimiter in the input filez\tz--comment-charz Comment character to skip header#z--sort-optionsz3Quoted list of additional options to `sort` command)rh   rd   z--outz-ozOutput gzip file)rh   z	--strand1z-s1z!strand1 field number (deprecated)z	--strand2z-s2z!strand2 field number (deprecated)c                    t           j        dk    rt          j        d          ddlm}m}m}  |||           dd|fD ]D}t          |          s3t          d| dt          j
        	           t          j        d
           E| }||dk    r(|s&t                              d           t          j        |}d}|                    d          rt          j        |          d         }|                    d          rt          j        |          d         }d}|dk    rd}nd}||z   |z   }n|}|t#          j        |          }nt'                      rd| dg}ng }||||d}t)          |          }t+          ||
||          }|r>t                              d           g }t                              d                    |                     |                    t5          j        ||dk    rt          j        ndt4          j                             t                              d                    |                     |                    t5          j        ||d         j        t          j                             |ddd         D ]5}|                                 |j         dk    rt          j        d
           6dS tC          |||          }ddg}tE          |||	|          }t                              d| d           t                              d| d           ||k    sJ tG          |d          5 } g }t                              d                    |                     |                    t5          j        ||dk    rt          j        ndt4          j                             t                              d            t                              d                    |                     |                    t5          j        ||d         j        t4          j                             |dk    rt                              d!           nt                              d"           t                              d                    |                     |                    t5          j        ||d         j        t4          j                             t                              d                    |                     |                    t5          j        ||d         j        |                      |ddd         D ]g}|                                 |j         dk    rFt                              d                    |j$                             t          j        d
           h	 ddd           n# 1 swxY w Y   t                              d#           t                              d$|            t                              d                    |                     t5          j        |          }|                                 |j         dk    rt          j        d
           dS dS )%a  
    Sort and index a contact list.

    Order the mates of each pair record so that all contacts are upper
    triangular with respect to the chromosome ordering given by the chromosomes
    file, sort contacts by genomic location, and index the resulting file.

    PAIRS_PATH : Contacts (i.e. read pairs) text file, optionally compressed.

    CHROMOSOMES_PATH : File listing desired chromosomes in the desired order.
    May be tab-delimited, e.g. a UCSC-style chromsizes file. Contacts mapping to
    other chromosomes will be discarded.

    **Notes**

    - csort can also be used to sort and index a text representation of
      a contact *matrix* in bedGraph-like format. In this case, substitute
      `pos1` and `pos2` with `start1` and `start2`, respectively.
    - Requires Unix tools: sort, bgzip + tabix or pairix.

    If indexing with Tabix, the output file will have the following properties:

    - Upper triangular: the read pairs on each row are assigned to side 1 or 2
      in such a way that (chrom1, pos1) is always "less than" (chrom2, pos2)
    - Rows are lexicographically sorted by chrom1, pos1, chrom2, pos2;
      i.e. "positionally sorted"
    - Compressed with bgzip [*]
    - Indexed using Tabix [*] on chrom1 and pos1.

    If indexing with Pairix, the output file will have the following properties:

    - Upper triangular: the read pairs on each row are assigned to side 1 or 2
      in such a way that (chrom1, pos1) is always "less than" (chrom2, pos2)
    - Rows are lexicographically sorted by chrom1, chrom2, pos1, pos2; i.e.
      "block sorted"
    - Compressed with bgzip [*]
    - Indexed using Pairix [+] on chrom1, chrom2 and pos1.

    [*] Tabix manpage: <http://www.htslib.org/doc/tabix.html>.
    [+] Pairix on Github: <https://github.com/4dn-dcic/pairix>

    ntzm"cooler csort" does not work on Windows. To ingest unsorted pairs data, see the "cooler cload pairs" command.r   )SIG_DFLSIGPIPEsignalr   bgzipzCommand z
 not foundr    r   N-z(Output name required when input is stdinr   z.txtz.txt.gzrN   z.blksrtz.possrtz--parallel=z--buffer-size=50%)r-   r/   r.   r0   zReordering pair mates... )r   r   r8   zInput: ''z	Output: 'r	   z1Reordering pair mates and sorting pair records...z.Sort order: block (chrom1, chrom2, pos1, pos2)z3Sort order: positional (chrom1, pos1, chrom2, pos2)zIndexing...z	Indexer: )%rO   namer   Abortrs   rq   rr   r   r$   r%   r   r&   r:   errorr#   opsplitextrQ   r=   r   r*   rI   r;   debugjoinappendr   Popenr   PIPEr   communicate
returncoderW   r_   r9   args)!r`   r1   rT   chrom1chrom2pos1pos2	flip_onlynprocr\   r2   r3   rU   outkwargsrq   rr   rs   toolr'   prefixext
sort_styler]   rC   r)   flip_cmdpipelineprV   	write_cmdr^   fouts!                                    r   csortr      s   d 
w$k:
 
 	

 0/////////
F7G %(  $ 	-T---CJ????HQKKK F
{S===LLCDDD+??5!! 	,[((+F??6"" 	[((+FCH"JJ"J:%+ {<00			 -e--/BC $fDAAF !((H !13fMMH R.///SXXh''(((#)S==ciid!  	
 	
 	
 	SXXh''(((XXb\-@TTT	
 	
 	
 $$B$ 	 	AMMOOO|q  	 	
 %UFLAAdO	&ufj'JJ	 	(v((()))****+++    '4   )	 DHLL(++,,,OO '-}}#))$%?     KKKLLLLL(++,,,OO HRL$7
       LMMMMQRRRKK**+++OO HRL$7
     LL),,---OO (2,2EdSSS   ddd^    <1$$LL!&!1!1222HQKKK I)	  )	  )	  )	  )	  )	  )	  )	  )	  )	  )	  )	  )	  )	  )	 X 	M"""'''(((CHHY''(((Y''	<1HQKKKKK s   I8W		WW)&rO   os.pathpathr|   rQ   r   r%   r   utilr    r   r   r   ImportErrorr9   devnull__name__r:   rR   rS   rZ   r[   r@   r   r   r*   rI   rW   r_   commandargumentPathoptionintChoicer?   r   r   r   r   <module>r      s   				            



               %""""""" % % %d2:t$$GGG% 
H		 DC+	A	<      6- - -4  ! ! ! zuz$???   ZUZt444>P   		B	   			   			   			   	6	w)	*	*   	D   t0sATX   	(   	+	   	+	   	>	  
 gt"4555k5'JQTUUUk5'JQTUUUB B VU VU 65              nB B Bs   1 A
	A
