
    DUf              	          d Z ddlmZ ddlZddlZddlZddlm	Z	m
Z
mZ ddlmZmZmZ ddlmZ ddlZdd	Z	 	 	 ddZd ZefdZ G d d          Z G d d          Z G d d          Zedddddddefd            ZdS )a  
Collection of classes and functions used for snipping and creation of pileups
(averaging of multiple small 2D regions)
The main user-facing function of this module is `pileup`, it performs pileups using
snippers and other functions defined in the module.  The concept is the following:

- First, the provided features are annotated with the regions from a view (or simply
  whole chromosomes, if no view is provided). They are assigned to the region that
  contains it, or the one with the largest overlap.
- Then the features are expanded using the `flank` argument, and aligned to the bins
  of the cooler
- Depending on the requested operation (whether the normalization to expected is
  required), the appropriate snipper object is created
- A snipper can `select` a particular region of a genome-wide matrix, meaning it
  stores its sparse representation in memory. This could be whole chromosomes or
  chromosome arms, for example
- A snipper can `snip` a small area of a selected region, meaning it will extract
  and return a dense representation of this area
- For each region present, it is first `select`ed, and then all features within it are
  `snip`ped, creating a stack: a 3D array containing all snippets for this region
- For features that are not assigned to any region, an empty snippet is returned
- All per-region stacks are then combined into one, which then can be averaged to create
  a single pileup
- The order of snippets in the stack matches the order of features, this way the stack
  can also be used for analysis of any subsets of original features

This procedure achieves a good tradeoff between speed and RAM. Extracting each
individual snippet directly from disk would be extremely slow due to slow IO.
Extracting the whole chromosomes into dense matrices is not an option due to huge
memory requirements. As a warning, deeply sequenced data can still require a
substantial amount of RAM at high resolution even as a sparse matrix, but typically
it's not a problem.
    )partialN   )is_compatible_viewframeis_cooler_balancedis_valid_expected)assign_view_automake_cooler_viewpool_decorator)LazyToeplitzbedc                    |                                  } |dk    r| ddg         | ddg<   | d         | d         z   dz  | d<   t          j        | d         |z            ||z  z
                      t                    | d<   t          j        | d         |z            ||z  z   d	z                       t                    | d
<   | d         |z  | d<   | d
         |z  | d<   nz|dk    rs| g d         | g d<   | d         | d         z   dz  | d<   | d         | d         z   dz  | d<   t          j        | d         |z            ||z  z
                      t                    | d<   t          j        | d         |z            ||z  z   d	z                       t                    | d<   | d         |z  | d<   | d         |z  | d<   t          j        | d         |z            ||z  z
                      t                    | d<   t          j        | d         |z            ||z  z   d	z                       t                    | d<   | d         |z  | d<   | d         |z  | d<   | S )a  Short summary.

    Parameters
    ----------
    features_df : pd.DataFrame
        Dataframe with feature coordinates.
    flank : int
        Flank size to add to the central bin of each feature.
    resolution : int
        Size of the bins to use.
    format : str
        "bed" or "bedpe" format: has to have 'chrom', 'start', 'end'
        or 'chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end1' columns, repectively.

    Returns
    -------
    pd.DataFrame
        DataFrame with features with new columns
           "center",  "orig_start"   "orig_end"
        or "center1", "orig_start1", "orig_end1",
           "center2", "orig_start2", "orig_rank_end2", depending on format.

    r   startend
orig_startorig_endr   centerlo   hibedpe)start1end1start2end2)orig_start1	orig_end1orig_start2	orig_end2r   r   center1r   r   center2lo1hi1lo2hi2)copynpfloorastypeint)features_dfflank
resolutionformats       S/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/api/snipping.pyexpand_align_featuresr/   3   s   0 ""$$K2=w>N2O\:./!,W!5E8J!Ja OHH[*Z7885J;NN
&++ 	D H[*Z7885J;NNQRR
&++ 	D  +40:=G(.;E	7		 <<<= 	DDD	
 #.h"7+f:M"MQR!RI"-h"7+f:M"MQR!RI H[+j899EZ<OO
&++ 	E H[+j899EZ<OORSS
&++ 	E !,E 2Z ?H)%0:=F H[+j899EZ<OO
&++ 	E H[+j899EZ<OORSS
&++ 	E !,E 2Z ?H)%0:=F    Fc                    || z  dk    st          d          t          |t          j                  r
|s|j        }nd}t          j        |          }t          j        |          }t          |j                  dk    r|dddf         }|dddf         }n|x}}t          j	        ||k              rt          d          ||z
  }	||z
  }
|	| z  
                    t                    }|
| z  
                    t                    }|| z  }||z
  }||z   dz   }t          j        |          }||d<   || z  |d	<   || z  |d
<   |
                    t                    |d<   |
                    t                    |d<   |S )a@  
    Convert genomic loci into bin spans on a fixed bin-segmentation of a
    genomic region. Window limits are adjusted to align with bin edges.

    Parameters
    -----------
    binsize : int
        Bin size (resolution) in base pairs.
    chroms : 1D array-like
        Column of chromosome names.
    centers_bp : 1D or nx2 array-like
        If 1D, center points of each window. If 2D, the starts and ends.
    flank_bp : int
        Distance in base pairs to extend windows on either side.
    region_start_bp : int, optional
        If region is a subset of a chromosome, shift coordinates by this amount.
        Default is 0.

    Returns
    -------
    DataFrame with columns:
        'chrom'        - chromosome
        'start', 'end' - window limits in base pairs
        'lo', 'hi'     - window limits in bins

    r   z/Flanking distance must be divisible by binsize.Nr   r   z Found interval with end > start.)indexchromr   r   r   r   )
ValueError
isinstancepdSeriesr2   r&   asarraylenshapeanyr(   r)   	DataFrame)binsizechroms
centers_bpflank_bpregion_start_bpignore_indexr2   left_bpright_bpleftrightleft_bin	right_bin	flank_binr   r   windowss                    r.   make_bin_aligned_windowsrK   r   s   D w!##JKKK&")$$ \ ZFJ''J
:!!QQQT"aaad#''(	vg !! =;<<<_$D&Ew&&s++H((--IG#I	I	B	Y		"Bl'''GGGG|GG'\GENIIcNNGDMIIcNNGDMNr0   c                    |\  }}t          |          dk    rd|v r|d         j        }|d         j        }||z
                      t                    }|                                |                                k    s
J d            t          j        t          |          |d         |d         ft          j                  }n|d         j        }	|d         j        }
|d         j        }|d	         j        }|
|	z
                      t                    }||z
                      t                    }|                                |                                k    s
J d            |                                |                                k    s
J d            t          j        t          |          |d         |d         ft          j                  }||d
         j        fS t          |          dk    r|\  }}n|x}}d|v r|d         j        }|d         j        }||}}n4|d         j        }|d         j        }|d         j        }|d         j        } | ||          }t          t          t          ||||          t          ||||                              }t          j        |          }||d
         j        fS )Nr   r   r   r   z,Pileup accepts only windows of the same sizer!   r"   r#   r$   _rankr   r   r   r   r   r   )r9   valuesr(   r)   maxminr&   fullnanlistmapr   zipstack)data_select	data_snipargsupportfeature_groupr   r   srV   r!   r"   r#   r$   s1s2region1region2e1e2datas                       r.   _extract_stackrd      s    G]
7||qm##t$+Bt$+Bb  %%A5577aeegg%%%'U%%%GS//1qt<bfEEEE&-C&-C&-C&-C)##C((B)##C((B6688rvvxx''')W'''6688rvvxx''')W'''GS//A1>GGEmG,333 7||q"##' -7#*5!(RB8$+6")8$+6");w((DWYgw??RRQSATATUUVVEHUOOE-(///r0   c           	      @   | d                                                                          rt          j        d           |                                 } | d                             d          | d<   t          t          |                     | d<   t           |t          t          ||          |                     dd                     \  }}t          j        |d          }t          j        |          }t          j        |          }||d	d	d	d	f         }|S )
a  
    Creates a stackup of snippets (a 3D array) by selecting each region present in the
    `features` (using the `data_select` function) and then extracting all snippets from
    the region (using `data_snip`).
    Handles on-diagonal and off-diagonal cases.

    Internal, so assumes correctly formatted input created by `pileup`.

    Parameters
    ----------
    features : DataFrame
        Table of features. Requires columns ['chrom', 'start', 'end'].
        Or ['chrom1', 'start1', 'end1', 'chrom1', 'start2', 'end2'].
        start, end are bp coordinates.
        lo, hi are bin coordinates.

    data_select : callable
        Callable that takes a region as argument and returns
        the data, mask and bin offset of a support region

    data_snip : callable
        Callable that takes data, mask and a 2D bin span (lo1, hi1, lo2, hi2)
        and returns a snippet from the selected support region

    map : callable
        Callable that works like builtin `map`.

    regionzJSome features do not have view regions assigned! Some snips will be empty. rM   F)sortr   axisN)isnullr;   warningswarnr%   fillnaranger9   rU   r   rd   groupbyr&   concatenateargsort)featuresrW   rX   rT   cumul_stack	orig_rankidxs          r.   _pileuprw      s"   :   ""&&(( 
X	
 	
 	
 }}H!(+22
 HX c(mm,,HW !	NK;;XE22

 

K .1555Ky))I
*Y

Cc111aaai(Kr0   c                   "    e Zd ZddZd Zd ZdS )CoolerSnipperNr   c                 <   |t          |          }n7	 t          ||dd          }n"# t          $ r}t          d          |d}~ww xY w|                    d          | _        || _        | j        j        | _        i | _        i | _	        d| _
        |i n|| _        | j                            dd           d| j        v rN| j        d         du rd| _        n>| j        d         d	u s| j        d         d| _        n| j        d         | _        nd| _        || _        dS )
a  Class for generating snips with "observed" data from a cooler

        Parameters
        ----------
        clr : cooler.Cooler
            Cooler object with data to use
        cooler_opts : dict, optional
            Options to pass to the clr.matrix() method, by default None
            Can be used to choose the cooler weight name, e.g.
            cooler_opts={balance='non-standard-weight'}, or use unbalanced data with
            cooler_opts={balance=False}
        view_df : pd.DataFrame, optional
            Genomic view to constrain the analysis, by default None and uses all
            chromosomes present in the cooler
        min_diag : int, optional
            This number of short-distance diagonals is ignored, by default 2
        NTcheck_sortingraise_errors0view_df is not a valid viewframe or incompatiblenamesparsebalanceweightF)r	   r   	Exceptionr4   	set_indexview_dfclrr=   offsetsdiag_indicatorspadcooler_opts
setdefaultclr_weight_namemin_diag)selfr   r   r   r   _es          r.   __init__zCoolerSnipper.__init__   s`   ( ?&s++GG
+"&!%	       F 
 ((00x'!!,!422+##Hd333(((	*d22'/$$ +u44#I.6'+$$'+'7	'B$$#+D  s   ( 
AAAc                    | j         j        |         }| j         j        |         }| j                            |          | j                            |d                   z
  | j        |<   | j                            |          | j                            |d                   z
  | j        |<    | j        j        di | j                            ||          }| j        rt          j
        | j                                        | j                                     |          j                  | _        t          j
        | j                                        | j                                     |          j                  | _        nt          j        | j                                        d                             |          j                                      t"                    | _        t          j        | j                                        d                             |          j                                      t"                    | _        | j        d         r|                                }| j        Y| j                            |          \  }}t          j        ||z
  t          j                  }t/          | |          | j        |<   |S )#  Select a portion of the cooler for snipping based on two regions in the view

        In addition to returning the selected portion of the data, stores necessary
        information about it in the snipper object for future snipping

        Parameters
        ----------
        region1 : str
            Name of a region from the view
        region2 : str
            Name of another region from the view.

        Returns
        -------
        CSR matrix
            Sparse matrix of the selected portion of the data from the cooler
        r   r   r   Ndtype )r   locr   offsetr   matrixr   fetchr   r&   isnanbinsrN   _isnan1_isnan2
zeros_liker(   booltocsrr   extentarangeint32r   r   	r   r_   r`   region1_coordsregion2_coordsr   r   r   diagss	            r.   selectzCoolerSnipper.select[  sR   $ )'2)'2 $ ? ?$(//1C
 C
 !
W !% ? ?$(//1C
 C
 !
W !444#344::N
 
  	8 45;;NKKR DL 8 45;;NKKR DLL =(..~>>E fTll L =(..~>>E fTll L H% 	$\\^^F=$X__^44FBIb2gRX666E,8%,G,GD )r0   c                 |   |\  }}}}| j         |         }	| j         |         }
| j        }||z  |	z
  ||z  |	z
  }}||z  |
z
  ||z  |
z
  }}|dk    sJ |dk    sJ |j        \  }}||z
  ||z
  }}d}dx}x}x}}|dk     r| }d}|dk     r| }d}||k    r
|||z
  z
  }d}||k    r
|||z
  z
  }d}|rbt          |d          }t	          ||          }t          |d          }t	          ||          }t          j        ||ft
          j                  }ns|||||f                                         	                    d          }t
          j        || j
        ||         ddf<   t
          j        |dd| j        ||         f<   | j        1| j        |         ||||f         | j        k     }t
          j        ||<   |S )a  Extract a snippet from the matrix

        Returns a NaN-filled array for out-of-bounds regions. Fills in NaNs based on the
        cooler weight, if using balanced data. Fills NaNs in all diagonals below min_diag

        Parameters
        ----------
        matrix : SCR matrix
            Output of the .select() method
        region1 : str
            Name of a region from the view corresponding to the matrix
        region2 : str
            Name of the other regions from the view corresponding to the matrix
        tup : tuple
            (start1, end1, start2, end2) coordinates of the requested snippet in bp

        Returns
        -------
        np.array
            Requested snippet.
        r   FNTfloat)r   r=   r:   rO   rP   r&   rQ   rR   toarrayr(   r   r   r   r   )r   r   r_   r`   tupr]   ra   r^   rb   offset1offset2r=   r!   r"   r#   r$   mndmdnout_of_boundspad_left	pad_right
pad_bottompad_topi0i1j0j1snippetDs                                  r.   snipzCoolerSnipper.snip  s   , BB,w',w','MW,rW}.GS'MW,rW}.GSaxxxxaxxxx|1sC#IB6:::9:zG77J M77tH M77C!GnG M77cAgI M 	7S!BS!BS!BS!Bgr2h//GG SWc#g-.6688??HHG02GDLS)111,-02GAAAt|CG,,-=$$W-c#gs3w.>?$-OAGAJr0   )NNr   __name__
__module____qualname__r   r   r   r   r0   r.   ry   ry     sH        9! 9! 9! 9!v1 1 1f? ? ? ? ?r0   ry   c                   *    e Zd Z	 	 	 	 ddZd Zd ZdS )ObsExpSnipperNr   balanced.avgc                    || _         || _        || _        |t          |          }n7	 t	          ||dd          }n"# t
          $ r}t          d          |d}~ww xY w	 t          |d||| j        gd          }n"# t
          $ r}t          d          |d}~ww xY w|                    d          | _	        | j         j
        | _
        i | _        i | _        d| _        |i n|| _        | j                            d	d           d
| j        v rN| j        d
         du rd| _        n>| j        d
         du s| j        d
         d| _        n| j        d
         | _        nd| _        || _        dS )a  Class for generating expected-normalised snips from a cooler

        Parameters
        ----------
        clr : cooler.Cooler
            Cooler object with data to use
        expected : pd.DataFrame
            Dataframe containing expected interactions in the cooler
        cooler_opts : dict, optional
            Options to pass to the clr.matrix() method, by default None
            Can be used to choose the cooler weight name, e.g.
            cooler_opts={balance='non-standard-weight'}, or use unbalanced data with
            cooler_opts={balance=False}
        view_df : pd.DataFrame, optional
            Genomic view to constrain the analysis, by default None and uses all
            chromosomes present in the cooler
        min_diag : int, optional
            This number of short-distance diagonals is ignored, by default 2
        expected_value_col : str, optional
            Name of the column in the expected dataframe that contains the expected
            interaction values, by default "balanced.avg"
        NTr{   r~   cisverify_coolerexpected_value_colsr}   provided expected is not validr   r   r   r   F)r   expectedexpected_value_colr	   r   r   r4   r   r   r   r=   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   s	            r.   r   zObsExpSnipper.__init__  s   >  "4?&s++GG
+"&!%	       F 
	F!!+% "	 	 	AA  	F 	F 	F=>>AE	F ((00x'!!,!422+##Hd333(((	*d22'/$$ +u44#I.6'+$$'+'7	'B$$#+D  -   = 
AAA A< <
BBBc                 v   ||k    st          d          | j        j        |         }| j        j        |         }| j                            |          | j                            |d                   z
  | j        |<   | j                            |          | j                            |d                   z
  | j        |<    | j        j        d	i | j                            ||          }| j        d         r|	                                }| j
        rt          j        | j                                        | j
                                     |          j                  | _        t          j        | j                                        | j
                                     |          j                  | _        nt          j        | j                                        d                             |          j                                      t&                    | _        t          j        | j                                        d                             |          j                                      t&                    | _        t)          | j                            ddg                              ||f          | j                 j                  | _        | j        Y| j                            |          \  }}t          j        ||z
  t          j                  }t)          | |          | j        |<   |S )
r   z3ObsExpSnipper is implemented for cis contacts only.r   r   r   r_   r`   Nr   r   )r4   r   r   r   r   r   r   r   r   r   r   r&   r   r   rN   r   r   r   r(   r   r   r   rp   	get_groupr   	_expectedr   r   r   r   r   r   s	            r.   r   zObsExpSnipper.select'  s   $ '!!RSSS)'2)'2 $ ? ?$(//1C
 C
 !
W !% ? ?$(//1C
 C
 !
W !444#344::N
 
 H% 	$\\^^F 	8 45;;NKKR DL 8 45;;NKKR DLL =(..~>>E fTll L =(..~>>E fTll L &M!!9i"899Y)**4+BD
 

 =$X__^44FBIb2gRX666E,8%,G,GD )r0   c                    |\  }}}}| j         |         }	| j         |         }
| j        }||z  |	z
  ||z  |	z
  }}||z  |
z
  ||z  |
z
  }}|dk    sJ |dk    sJ |j        \  }}||z
  ||z
  }}d}dx}x}x}}|dk     r| }d}|dk     r| }d}||k    r
|||z
  z
  }d}||k    r
|||z
  z
  }d}|rat          |d          }t	          ||          }t          |d          }t	          ||          }t          j        ||ft
          j                  S |||||f                                         	                    d          }t
          j        || j
        ||         ddf<   t
          j        |dd| j        ||         f<   | j        ||||f         }| j        1| j        |         ||||f         | j        k     }t
          j        ||<   ||z  S )a  Extract an expected-normalised snippet from the matrix

        Returns a NaN-filled array for out-of-bounds regions. Fills in NaNs based on the
        cooler weight, if using balanced data. Fills NaNs in all diagonals below min_diag

        Parameters
        ----------
        matrix : SCR matrix
            Output of the .select() method
        region1 : str
            Name of a region from the view corresponding to the matrix
        region2 : str
            Name of the other regions from the view corresponding to the matrix
        tup : tuple
            (start1, end1, start2, end2) coordinates of the requested snippet in bp

        Returns
        -------
        np.array
            Requested snippet.
        r   FNTr   )r   r=   r:   rO   rP   r&   rQ   rR   r   r(   r   r   r   r   r   ) r   r   r_   r`   r   r]   ra   r^   rb   r   r   r=   r!   r"   r#   r$   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                                    r.   r   zObsExpSnipper.snipa  s8   , BB,w',w','MW,rW}.GS'MW,rW}.GSaxxxxaxxxx|1sC#IB6:::9:zG77J M77tH M77C!GnG M77cAgI M 	7S!BS!BS!BS!B7B8RV,,, SWc#g-.6688??HHG02GDLS)111,-02GAAAt|CG,,-N3s7CG+,=$$W-c#gs3w.>?$-OAGAJ{r0   )NNr   r   r   r   r0   r.   r   r     s_        
 )T! T! T! T!l8 8 8tA A A A Ar0   r   c                   $    e Zd Z	 ddZd Zd ZdS )ExpectedSnipperNr   r   c                    || _         || _        || _        |t          |          }n7	 t	          ||dd          }n"# t
          $ r}t          d          |d}~ww xY w	 t          |d||| j        gd          }n"# t
          $ r}t          d          |d}~ww xY w|                    d          | _	        | j         j
        | _
        i | _        i | _        || _        dS )	a  Class for generating expected snips

        Parameters
        ----------
        clr : cooler.Cooler
            Cooler object to which the data corresponds
        expected : pd.DataFrame
            Dataframe containing expected interactions in the cooler
        view_df : pd.DataFrame, optional
            Genomic view to constrain the analysis, by default None and uses all
            chromosomes present in the cooler
        min_diag : int, optional
            This number of short-distance diagonals is ignored, by default 2
        expected_value_col : str, optional
            Name of the column in the expected dataframe that contains the expected
            interaction values, by default "balanced.avg"
        NTr{   r~   r   r   r   r   )r   r   r   r	   r   r   r4   r   r   r   r=   r   r   r   )r   r   r   r   r   r   r   r   s           r.   r   zExpectedSnipper.__init__  s9   (  "4?&s++GG
+"&!%	       F 
	F!!+% "	 	 	AA  	F 	F 	F=>>AE	F ((00x'! r   c                    ||k    st          d          | j        j        |         }| j        j        |         }| j                            |          | j                            |d                   z
  | j        |<   | j                            |          | j                            |d                   z
  | j        |<   t          j        | j                            |                    | _	        t          j        | j                            |                    | _
        t          | j                            ddg                              ||f          | j                 j                  | _        | j        Y| j                            |          \  }}t          j        ||z
  t          j                  }t          | |          | j        |<   | j        S )a4  Select a portion of the expected matrix for snipping based on two regions
        in the view

        In addition to returning the selected portion of the data, stores necessary
        information about it in the snipper object for future snipping

        Parameters
        ----------
        region1 : str
            Name of a region from the view
        region2 : str
            Name of another region from the view.

        Returns
        -------
        CSR matrix
            Sparse matrix of the selected portion of the data from the cooler
        z5ExpectedSnipper is implemented for cis contacts only.r   r_   r`   Nr   )r4   r   r   r   r   r   r&   diffr   r   r   r   r   rp   r   r   rN   r   r   r   r   r   )r   r_   r`   r   r   r   r   r   s           r.   r   zExpectedSnipper.select  s   & '!!TUUU)'2)'2 $ ? ?$(//1C
 C
 !
W !% ? ?$(//1C
 C
 !
W 88998899%M!!9i"899Y)**4+BD
 

 =$X__^44FBIb2gRX666E,8%,G,GD )~r0   c                    |\  }}}}| j         |         }	| j         |         }
| j        }||z  |	z
  ||z  |	z
  }}||z  |
z
  ||z  |
z
  }}|dk    sJ |dk    sJ ||z
  ||z
  }}|dk     s|dk     s|| j        k    s|| j        k    r!t	          j        ||ft          j                  S |||||f         }| j        1| j        |         ||||f         | j        k     }t          j        ||<   |S )a}  Extract an expected snippet

        Returns a NaN-filled array for out-of-bounds regions.
        Fills NaNs in all diagonals below min_diag

        Parameters
        ----------
        exp : SCR matrix
            Output of the .select() method
        region1 : str
            Name of a region from the view corresponding to the matrix
        region2 : str
            Name of the other regions from the view corresponding to the matrix
        tup : tuple
            (start1, end1, start2, end2) coordinates of the requested snippet in bp

        Returns
        -------
        np.array
            Requested snippet.
        r   )	r   r=   r   r   r&   rQ   rR   r   r   )r   expr_   r`   r   r]   ra   r^   rb   r   r   r=   r!   r"   r#   r$   r   r   r   r   s                       r.   r   zExpectedSnipper.snip  s#   , BB,w',w','MW,rW}.GS'MW,rW}.GSaxxxxaxxxxsC#IB77cAggtvtv7B8RV,,,c#gs3w&'=$$W-c#gs3w.>?$-OAGAJr0   )Nr   r   r   r   r0   r.   r   r     sK        JX:! :! :! :!x( ( (T' ' ' ' 'r0   r   r   i autor   r   c
                    h d                     |j                  rd}
n.h d                     |j                  rd}
nt          d          |t          |           }n7	 t	          || dd          }n"# t
          $ r}t          d	          |d}~ww xY wt          ||          }|t          ||| j        |

          }n|	                                }|
dk    rW|d         | j        z  
                    t                    |d<   |d         | j        z  
                    t                    |d<   n|d         | j        z  
                    t                    |d<   |d         | j        z  
                    t                    |d<   |d         | j        z  
                    t                    |d<   |d         | j        z  
                    t                    |d<   |dvr:	 t          | |d          }n&# t
          $ r}t          d| d          |d}~ww xY w|dk    rH|dvrDt          |                                 d|          j                                      dd          }n|dv rd}|g d                             | j        d !          }t          t'          |d"         j        |                    }|d#                             |          |d$<   |
dk    rV|ddg                             |d$                             d%          d%!          
                    t                    |ddg<   n|ddg                             |d$                             d%          d%!          
                    t                    |ddg<   |ddg                             |d$                             d%          d%!          
                    t                    |ddg<   |t1          | |d&|i|'          }nt3          | ||d&|i||(          }t5          ||j        |j        |	)          }|
dk    r)t;          j        |t;          j        |d*+                    }|S ),a  
    Pileup features over the cooler.

    Parameters
    ----------
    clr : cooler.Cooler
        Cooler with Hi-C data
    features_df : pd.DataFrame
        Dataframe in bed or bedpe format: has to have 'chrom', 'start', 'end'
        or 'chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2' columns.
    view_df : pd.DataFrame
        Dataframe with the genomic view for this operation (has to match the
        expected_df, if provided)
    expected_df : pd.DataFrame
        Dataframe with the expected level of interactions at different
        genomic separations
    expected_value_col : str
        Name of the column in expected used for normalizing.
    flank : int
        How much to flank the center of the features by, in bp
    min_diag: str or int
        All diagonals of the matrix below this value are ignored. 'auto'
        tries to extract the value used during the matrix balancing,
        if it fails defaults to 2
    clr_weight_name : str
        Value of the column that contains the balancing weights
    force : bool
        Allows start>end in the features (not implemented)
    nproc : int, optional
        How many processes to use for calculation. Ignored if map_functor is passed.
    map_functor : callable, optional
        Map function to dispatch the matrix chunks to workers.
        If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
        If nproc=1 this defaults the builtin map.

    Returns
    -------
        np.ndarray: a stackup of all snippets corresponding to the features, with shape
        (n, D, D), where n is the number of snippets and (D, D) is the shape of each
        snippet

    >   r   r3   r   r   >   r   chrom1chrom2r   r   r   zUnknown feature_df formatNTr{   r~   )r-   r   r   r   r   r   r!   r   r"   r   r#   r   r$   )NF)r}   z#provided cooler is not balanced or z is missingr   zbins/ignore_diagsr   )r3   r   r   r   ri   r   rf   region_offsetr   r   )r   r   r   )r   r   r   r   )rT   )r   r   r   )axes) issubsetcolumnsr4   r	   r   r   r   r/   r=   r%   r(   r)   r   dictopenattrsgetapplyr   rU   rN   replacesubtractrn   ry   r   rw   r   r   r&   fmax	transpose)r   r*   r   expected_dfr   r+   r   r   nprocmap_functorfeature_typer   r   region_offsetsregion_offsets_dictsnipperrV   s                    r.   pileupr   5  s   p !  ))+*=>> 6	A	A	A	J	J
 
 6 4555"3''	X'"!	  AA  	X 	X 	XOPPVWW	X #;88K +L
 
 
 "&&((5  !,W!5!C K KC P PK!,U!3ck!A I I# N NK"-h"7#+"E!M!Mc!R!RK"-f"5"C!K!KC!P!PK"-h"7#+"E!M!Mc!R!RK"-f"5"C!K!KC!P!PKm++	"3dKKKAA 	 	 	RoRRR 	
 6o]BB

#<?#<#<=CDDHHA
 
 
M	)	) 6667==cjq=QQNs76?#9>JJKK#.x#8#@#@AT#U#UK ut%XO,33A66    VC[[ 	T4L!! 'XO,33A66    VC[[ 	UEN# 'XO,33A66    VC[[ 	UEN# "O4	
 
 
  "O41
 
 
 K;OOOEur|E	BBBCCLs0   !A5 5
B?BB%G8 8
HHH)r   )r   r   F)__doc__	functoolsr   rl   numpyr&   pandasr6   
lib.checksr   r   r   
lib.commonr   r	   r
   lib.numutilsr   r/   rK   rd   rT   rw   ry   r   r   r   r   r0   r.   <module>r      s     B                        
 L K K K K K K K K K ' ' ' ' ' ' < < < <F C C C CL*0 *0 *0Z 36 7 7 7 7tn n n n n n n nbR R R R R R R RjN N N N N N N N`  %

h h h h h hr0   