
    DUf                         d dl Zd dlZd dlZddlmZ d dlZ ej        ej	                   	 ddZ
d	 Zd
 Z	 ddZ	 	 	 	 	 ddZdS )    N   )is_compatible_viewframe)level@B strand   c              #     K   |                                  j        d         }t          j        t          j        d||          |          }t          t          |dd         |dd                             }|D ]\  }}|                                  ||         }	|	d                             |          |	d<   |	d                             |          |	d<   |	|	d         dk    |	d         dk    z           }	|	j        d         dk    rQt          j        |	ddg         	                    t                              |	ddg<   |	                    d          V  t          j        d	||z  d
z  dd           dS )zXGenerates chunks of pixels from the cooler and adjusts their bin IDs to follow the view.r   Nr   bin1_idbin2_idTdropz
Processed d   z.2fz% pixels)pixelsshapenpappendarangelistzipmapsortastypeintreset_indexlogginginfo)
clr
binmapping	chunksizeorientation_colnprocnpixelschunksi0i1chunks
             T/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/api/rearrange.py_generate_adjusted_chunksr)      s      jjll #GYryGY77AAF#fSbSk6!"":..//F  
@ 
@B

RU# +//
;;i +//
;;iuY'2-%	2Bb2HIJ;q>A,.Gy),-44S99- -E9i() ###.....>"W*S.>>>>????
@ 
@    c                 h    | d                                          | d<   | d         | d         z
  | d<   | S )Nlengthendstart)cumsum)chromdfs    r(   _adjust_start_endr1   !   s9    X&--//GENu~(99GGNr*   c                     | j         d d d                             d          } | d         | d         z
  }| d                                         | d         z
  | d<   | d         |z   | d<   | S )Nr
   Tr   r-   r.   )ilocr   max)regdfls     r(   
_flip_binsr7   '   sp    Jttt((d(33EeuW~%A5\%%''%,6E'N>A%E%LLr*   	new_chromc                   	 t          t          |d                                         ||                                                             }t          t          |d                                         ||         dk                                                        	|                     dg          } t	          j        | |d                              dg          }|                    dd	                              	fd
                              d          }t	          j	        ||d          }|d         
                    |          |d<   |d         |d         z
  |d<   |                    dd	                              t                                        ddg          }t          j        d           d | d                             t                     D             }|                    d t          |d                             t                     |j                  D                        t          j        d           |                    dg          }||fS )aZ  
    Rearranges the input `bins_old` based on the information in the `view_df` DataFrame.

    Parameters
    ----------
    bins_old : bintable
        The original bintable to rearrange.
    view_df : viewframe
        Viewframe with new order of genomic regions. Can have an additional column for
        the new chromosome name (`new_chrom_col`), and an additional column for the
        strand orientation (`orientation_col`, '-' will invert the region).
    new_chrom_col : str, optional
        Column name in the view_df specifying new chromosome name for each region,
        by default 'new_chrom'. If None, then the default chrom column names will be used.
    orientation_col : str, optional
        Column name in the view_df specifying strand orientation of each region,
        by default 'strand'. The values in this column can be "+" or "-".
        If None, then all will be assumed "+".


    Returns
    -------
    bins_new : bintable
        The rearranged bintagle with the new chromosome names and orientations.
    bin_mapping : dict
        Mapping of original bin IDs to new bin IDs
    name-old_id)namesF)drop_unassignedview_region)subset)
group_keysc                 @    | j                  rt          |           n| S )N)r:   r7   )xflipdicts    r(   <lambda>z rearrange_bins.<locals>.<lambda>W   s    (16*:AA r*   Tr   )view_dfdf_view_colchromr-   r.   r,   )columnszRearranged binsc                     i | ]}|d S )r
    ).0r<   s     r(   
<dictcomp>z"rearrange_bins.<locals>.<dictcomp>g   s    KKK&62KKKr*   c                     i | ]\  }}||	S rK   rK   )rL   r<   new_ids      r(   rM   z"rearrange_bins.<locals>.<dictcomp>i   s.     	
 	
 	
 F	
 	
 	
r*   zCreated bin mapping)dictr   to_numpyr   bfassign_viewdropnagroupbyapplysort_bedframer   r1   r   r   r   r   r   updateindex)
bins_oldrF   new_chrom_colr!   	chromdictbins_subsetbins_invertedbins_newbin_mappingrD   s
            @r(   rearrange_binsra   /   sb   < S1133W]5K5T5T5V5VWWXXIGFO$$&&)AS)H(R(R(T(TUU H ##8*#55H.7EJJJQQ R  K 	Me<<	AAAA	B	B	$		 
 !  H
 !/33I>>HW!%8G+<<HXU33	 	!	!	x/	0	0 
 L"###KKHX,>,E,Ec,J,JKKKK	
 	
"%hx&8&?&?&D&Dhn"U"U	
 	
 	
   L&'''}}hZ}00H[  r*   逖 wc           
         |                                 }	 t          |g d         | dd          }n"# t          $ r}	t          d          |	d}	~	ww xY w|| j        d         }|ud}||j        v rjt          j        d	           ||j        v rMd
t          j	        
                    dt          j        t          j                  j                   }||j        v M||j        vr|d         |j        dd|f<   |ud}||j        v rt          j        d           ||j        v rMdt          j	        
                    dt          j        t          j                  j                   }||j        v M||j        vrd|j        dd|f<   t          j        |                    |                              d                     st          d          |                                 dd         }
t'          |
|||          \  }}t          j        d           t)          j        ||t-          | ||          ||t/          d                     t          j        d|            dS )a  Reorder cooler following a genomic view.

    Parameters
    ----------
    clr : cooler.Cooler
        Cooler object
    view_df : viewframe
        Viewframe with new order of genomic regions. Can have an additional column for
        the new chromosome name (`new_chrom_col`), and an additional column for the
        strand orientation (`orientation_col`, '-' will invert the region).
    out_cooler : str
        File path to save the reordered data
    new_chrom_col : str, optional
        Column name in the view_df specifying new chromosome name for each region,
        by default 'new_chrom'. If None, then the default chrom column names will be used.
    orientation_col : str, optional
        Column name in the view_df specifying strand orientation of each region,
        by default 'strand'. The values in this column can be "+" or "-".
        If None, then all will be assumed "+".
    assembly : str, optional
        The name of the assembly for the new cooler. If None, uses the same as in the
        original cooler.
    chunksize : int, optional
        The number of pixels loaded and processed per step of computation.
    mode : str, optional
        (w)rite or (a)ppend to the output cooler file. Default: w
    )rH   r.   r-   r:   FT)check_sortingraise_errorsz0view_df is not a valid viewframe or incompatibleNzgenome-assemblyr8   zknew_chrom_col is not provided, but new_chrom column exists. Pre-existing new_chrom column will not be used.
new_chrom_r   rH   r   zgorientation_col is not provided, but strand column exists. Pre-existing strand column will not be used.strand_+c                 `    t          j        t          j        | j                  dk              S )Nr   )r   alldiffrY   )rC   s    r(   rE   z"rearrange_cooler.<locals>.<lambda>   s"    rvbgag>N>NRS>S7T7T r*   z#New chromosomes are not consecutive)r[   r!   zCreating a new cooler)r    g    eA)assemblymodemergebufzCreated a new cooler at )copyr   	Exception
ValueErrorr   rI   r   warnr   randomrandintiinfoint32r4   locrk   rU   rV   binsra   coolercreate_coolerr)   r   )r   rF   
out_coolerr[   r!   rm   r    rn   _erZ   r_   r`   s                r(   rearrange_coolerr   s   s   L llnnGT#5556	
 
 
  T T TKLLRSST 8-. #GO++LC    7?22O!2!21bhrx6H6H6L!M!MOO   7?22 GO++(/(8AAA}$% "go--L@   00V	(9(9!RXbh=O=O=S(T(TVVO 00go--*-AAA&'6&&,,-T-TUU  @ >???xxzz!!!}H*'  Hk L()))
!	
 	
 	

 S    L8J8899999s   2 
AAA)r   r   r   )r8   r   )r8   r   Nrb   rc   )bioframerR   rz   numpyr   
lib.checksr   r   basicConfigINFOr)   r1   r7   ra   r   rK   r*   r(   <module>r      s             0 0 0 0 0 0   ', ' ' ' ' KL@ @ @ @,     CKA! A! A! A!P 	f: f: f: f: f: f:r*   