
    DUfJ                         d dl Zd dlZd dlZd dlZddlmZ ddlm	Z	m
Z
 ddlmZmZ d dlZddZ	 ddZd	 Zd
 Z	 	 	 	 	 ddZdddddddefdZ	 	 	 	 	 ddZdS )    N   )numutils)is_compatible_viewframeis_cooler_balanced)make_cooler_viewalign_track_with_coolerc                 $   g }|D ]}t          j        |          t          j        |          z  }||dk    r4t          j                            ||         ||                   d         }n|dk    r3t          j                            ||         ||                   d         }n|dk    rht          j                            ||         ||                   d         }t          j        |          |z  |z  t          j        ||                   z  }ne|dk    r=t          j	        ||         ||                   t          j
        ||                   z  }n"t          d                    |                    |                    |           t          t          |                    D ](}t          j        ||                   ||         z  ||<   )|7t          j        t          j        |                     }	| |	         ||	         }} | |fS )a@  
    Flip `eigvecs` to achieve a positive correlation with `phasing_track`.

    Parameters
    ----------
    sort_metric : str
        If provided, re-sort `eigenvecs` and `eigvals` in the order of
        decreasing correlation between phasing_track and eigenvector, using the
        specified measure of correlation. Possible values:
        'pearsonr' - sort by decreasing Pearson correlation.
        'var_explained' - sort by decreasing absolute amount of variation in
        `eigvecs` explained by `phasing_track` (i.e. R^2 * var(eigvec)).
        'MAD_explained' - sort by decreasing absolute amount of Median Absolute
        Deviation from the median of `eigvecs` explained by `phasing_track`
        (i.e. COMED(eigvec, phasing_track) * MAD(eigvec)).
        'spearmanr' - sort by decreasing Spearman correlation.
    N	spearmanrr   pearsonrvar_explainedMAD_explainedzUnknown sorting metric: {})npisfinitescipystatsr
   r   signvarr   COMEDMAD
ValueErrorformatappendrangelenargsortabs)
eigvalseigvecsphasing_tracksort_metriccorrseigvecmaskcorriidxs
             T/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/api/eigdecomp.py_phase_eigsr(      s   & E  {6""R[%?%??+"<"<;((t)<fTlKKANDDJ&&;''d(;VD\JJ1MDDO++;''d(;VD\JJ1MD74==4'$.t1E1EEDDO++>-"5vd|DDx|tH H DD 9@@MMNNNT 3w<<   4 4WU1X&&3

 j"&--(("3<G       c           
      ,    t          j                    d t          j                    <                        d          dk    } j        d         |dz   k    s|                                |dz   k    rXt          j        d t          |          D                       t          j         fdt          |          D                       fS |r-t          | dz   |          D ]}t          j         d|           t          j         |          \  }}	}	}	|rC|dk     r=t          j	        |dt          j
        ||d	d	f         d	d	|f         |                    }|dz  }d|| d	d	f<   d|d	d	| f<   t          j        ||d
          \  }
}|
t          j        t          j        |
dz  d                    d	d	d	f         z  }
|
t          j        t          j        |                    d	d	d	f         z  }
|t          ||
||          \  }}
||
fS )a  
    Compute compartment eigenvector on a dense cis matrix.

    Note that the amplitude of compartment eigenvectors is weighted by their
    corresponding eigenvalue

    Parameters
    ----------
    A : 2D array
        balanced dense contact matrix
    n_eigs : int
        number of eigenvectors to compute
    phasing_track : 1D array, optional
        if provided, eigenvectors are flipped to achieve a positive correlation
        with `phasing_track`.
    ignore_diags : int
        the number of diagonals to ignore
    clip_percentile : float
        if >0 and <100, clip pixels with diagonal-normalized values
        higher than the specified percentile of matrix-wide values.
    sort_metric : str
        If provided, re-sort `eigenvecs` and `eigvals` in the order of
        decreasing correlation between phasing_track and eigenvector, using the
        specified measure of correlation. Possible values:
        'pearsonr' - sort by decreasing Pearson correlation.
        'var_explained' - sort by decreasing absolute amount of variation in
        `eigvecs` explained by `phasing_track` (i.e. R^2 * var(eigvec))
        'MAD_explained' - sort by decreasing absolute amount of Median Absolute
        Deviation from the median of `eigvecs` explained by `phasing_track`
        (i.e. COMED(eigvec, phasing_track) * MAD(eigvec)).
        'spearmanr' - sort by decreasing Spearman correlation.
        This option is designed to report the most "biologically" informative
        eigenvectors first, and prevent eigenvector swapping caused by
        translocations. In reality, however, sometimes it shows poor
        performance and may lead to reporting of non-informative eigenvectors.
        Off by default.


    Returns
    -------
    eigenvalues, eigenvectors

    .. note:: ALWAYS check your EVs by eye. The first one occasionally does
              not reflect the compartment structure, but instead describes
              chromosomal arms or translocation blowouts.

    r   axisr*   c                 &    g | ]}t           j        S  )r   nan.0r%   s     r'   
<listcomp>zcis_eig.<locals>.<listcomp>y   s    444bf444r)   c                 h    g | ].}t          j        j        d                    t           j        z  /S )r   )r   onesshaper0   )r2   r%   As     r'   r3   zcis_eig.<locals>.<listcomp>z   s0    JJJqbgagaj))BF2JJJr)      g      ?d   NTmask_zero_rowsr   )r   arrayr   sumr6   r   r   set_diagobserved_over_expectedclip
percentileget_eigsqrtnansumr   r(   )r7   n_eigsr   ignore_diagsclip_percentiler    r#   dOE_r   r   s   `           r'   cis_eigrK   @   s<   d 	AAr{1~~o55a5==1Dwqz\A%%%|a7G)G)GH44eFmm44455HJJJJE&MMJJJKK
 	

  )}q(,77 	) 	)Aaa((((1!T::KB1a R?S00WRBM"T111W+aaag*>PPQQ #IB BuaaaxLBqqq4%xL'F4HHHGWrwryAA666774@@Grwrvg''400G  &wTTGr)   c                 <   t          j        | |         |          }| |         }||||k    <   || |<   t          j        | d          }|t          j        | d          dk             }t          j        ||          }|dk    ||k     z  }	d| |	d d f<   d| d d |	f<   | S )Nr   r,   )r   rA   r=   )
r7   	transmaskperc_topperc_bottomlimtdatamargmarg_nz
min_cutoffdropmasks
             r'   _filter_heatmaprV      s    
-)h
/
/CiLEE%#+AiL 6!!D26!!$$$q()Gw44JqTJ./HAhkNAaaakNHr)   c                     |                     t          j                  }t          j        t          j        | d                    dk    }d|d d |f<   d||d d f<   t          j        | |           | S )Nr   r,   g|=r   )astyper   uint8r   r=   r   fake_cis)r7   cismaskss      r'   	_fake_cisr]      sr    nnRX&&G
rvaa   !!U*AGAAAqDMGAqqqDMa!!!Hr)   X@r8   Fc                 :   t          j        |           } | j        d         | j        d         k    rt          d          | j        d         }|d         dk    r6|d         |k    r*t          j        t          j        |          dk              s"t          d                    |                    t          |dd         |dd                   }g }	t          |          D ]/\  }
\  }}d| ||||f<   |		                    |
g||z
  z             0t          j        |	          }	|	dddf         |	dddf         k    }t          j
        | d          dk    }| }t           j                            ||          }d| |ddf<   d| dd|f<   t          | ||z  ||          } t          j
        | d          dk    }| }t           j                            ||          }d| |ddf<   d| dd|f<   t          | |           } t          j        |           d         } t          | |           } t          j        |           d         } t          j        | |                   }| |z
  |z  }d||ddf<   d|dd|f<   t          j        ||d	          \  }}|t          j        t          j
        |d
z  d                    dddf         z  }|t          j        t          j        |                    dddf         z  }|t+          ||||          \  }}||fS )a  
    Compute compartmentalization eigenvectors on trans contact data

    Parameters
    ----------
    A : 2D array
        balanced whole genome contact matrix
    partition : sequence of int
        bin offset of each contiguous region to treat separately (e.g.,
        chromosomes or chromosome arms)
    n_eigs : int
        number of eigenvectors to compute; default = 3
    perc_top : float (percentile)
        filter - clip trans blowout contacts above this cutoff; default = 99.95
    perc_bottom : float (percentile)
        filter - remove bins with trans coverage below this cutoff; default=1
    phasing_track : 1D array, optional
        if provided, eigenvectors are flipped to achieve a positive correlation
        with `phasing_track`.
    sort_metric : str
        If provided, re-sort `eigenvecs` and `eigvals` in the order of
        decreasing correlation between phasing_track and eigenvector, using the
        specified measure of correlation. Possible values:
        'pearsonr' - sort by decreasing Pearson correlation.
        'var_explained' - sort by decreasing absolute amount of variation in
        `eigvecs` explained by `phasing_track` (i.e. R^2 * var(eigvec))
        'MAD_explained' - sort by decreasing absolute amount of Median Absolute
        Deviation from the median of `eigvecs` explained by `phasing_track`
        (i.e. COMED(eigvec, phasing_track) * MAD(eigvec)).
        'spearmanr' - sort by decreasing Spearman correlation.
        This option is designed to report the most "biologically" informative
        eigenvectors first, and prevent eigenvector swapping caused by
        translocations. In reality, however, sometimes it shows poor
        performance and may lead to reporting of non-informative eigenvectors.
        Off by default.


    Returns
    -------
    eigenvalues, eigenvectors

    .. note:: ALWAYS check your EVs by eye. The first one occasionally does
          not reflect the compartment structure, but instead describes
          chromosomal arms or translocation blowouts.


    r   r8   zA is not symmetriczANot a valid partition. Must be a monotonic sequence from 0 to {}.Nr,   Tr:   r   )r   r<   r6   r   alldiffr   zip	enumerateextendrD   logical_andouterrV   r]   r   iterative_correction_symmetricmeanrB   rC   r   r(   )r7   	partitionrE   rN   rO   r   r    n_binsextentspart_idsni0i1is_trans
is_bad_binis_good_binis_validAbarOr   r   s                        r'   	trans_eigrw      sT   p 	AwqzQWQZ-...WQZF!imv55"&ASASVWAW:X:X5"F6NN
 
 	
 )CRC.)ABB-00GH )) ) )8B"R%B,rBw((((x!!H4 HT111W$55H 11%%%*J+K~##K==HAj!!!mAaaam8h.+FFA11%%%*J+K~##K==HAj!!!mAaaam 	!hYA/2215A!hYA/2215A 71X;D	
TTAAj!!!mAaaam'6$GGGGWrwryAA666774@@Grwrvg''400G &wTTGr)   weightgX@c	                 .    |t                     }n7	 t          | dd          }	n"# t          $ r}
t          d          |
d}
~
ww xY w	 t	           d          }	n&# t          $ r}
t          d d          |
d}
~
ww xY w,                     d                               d	d
          n                                 dd         }t           |dd          t          j
        ||                              dgd          }|j        dd|j        f         }d t                    D             }|D ]}t          j        ||<   |                                }d t                    D             }|D ]}t          j        ||<    fd} |||j                  }|D ]W\  }}}t          j        ||          j        }|j        |j        ||f<   t          j        ||          j        }||j        ||f<   X||fS )a  
    Compute compartment eigenvector for a given cooler `clr` in a number of
    symmetric intra chromosomal regions defined in view_df (cis-regions), or for each
    chromosome.

    Note that the amplitude of compartment eigenvectors is weighted by their
    corresponding eigenvalue. Eigenvectors can be oriented by passing a binned
    `phasing_track` with the same resolution as the cooler.


    Parameters
    ----------
    clr : cooler
        cooler object to fetch data from
    phasing_track : DataFrame
        binned track with the same resolution as cooler bins, the fourth column is
        used to phase the eigenvectors, flipping them to achieve a positive correlation.
    view_df : iterable or DataFrame, optional
        if provided, eigenvectors are calculated for the regions of the view only,
        otherwise chromosome-wide eigenvectors are computed, for chromosomes
        specified in phasing_track.
    n_eigs : int
        number of eigenvectors to compute
    clr_weight_name : str
        name of the column with balancing weights to be used.
    ignore_diags : int, optional
        the number of diagonals to ignore. Derived from cooler metadata
        if not specified.
    clip_percentile : float
        if >0 and <100, clip pixels with diagonal-normalized values
        higher than the specified percentile of matrix-wide values.
    sort_metric : str
        If provided, re-sort `eigenvecs` and `eigvals` in the order of
        decreasing correlation between phasing_track and eigenvector, using the
        specified measure of correlation. Possible values:
        'pearsonr' - sort by decreasing Pearson correlation.
        'var_explained' - sort by decreasing absolute amount of variation in
        `eigvecs` explained by `phasing_track` (i.e. R^2 * var(eigvec))
        'MAD_explained' - sort by decreasing absolute amount of Median Absolute
        Deviation from the median of `eigvecs` explained by `phasing_track`
        (i.e. COMED(eigvec, phasing_track) * MAD(eigvec)).
        'spearmanr' - sort by decreasing Spearman correlation.
        This option is designed to report the most "biologically" informative
        eigenvectors first, and prevent eigenvector swapping caused by
        translocations. In reality, however, sometimes it shows poor
        performance and may lead to reporting of non-informative eigenvectors.
        Off by default.
    map : callable, optional
        Map functor implementation.
    Returns
    -------
    eigvals, eigvec_table -> DataFrames with eigenvalues for each region and
    a table of eigenvectors filled in the `bins` table.
    .. note:: ALWAYS check your EVs by eye. The first one occasionally does
              not reflect the compartment structure, but instead describes
              chromosomal arms or translocation blowouts. Possible mitigations:
              employ `view_df` (e.g. arms) to avoid issues with chromosomal arms,
              consider blacklisting regions with translocations during balancing.
    NT)check_sortingraise_errorsz0view_df is not a valid viewframe or incompatibler{   #provided cooler is not balanced or  is missingzbins/rF   r   view_dfclr_weight_namemask_clr_bad_binsdrop_track_naview_regionr   )subsetr-   c                     g | ]
}d |dz    S )Er8   r/   r1   s     r'   r3   zeigs_cis.<locals>.<listcomp>  s"    999ak!a%kk999r)   c                     g | ]
}d |dz    S eigvalr8   r/   r1   s     r'   r3   zeigs_cis.<locals>.<listcomp>  s%    >>>1&q1u&&>>>r)   c                     | dd         }                     	                              |          }#t          j        |          }|d         j        }nd}t          |
|          \  }}|||fS )a  
        perform eigen decomposition for a given region
        assuming safety checks are done outside of this
        function.
        Parameters
        ----------
        region: tuple-like
            tuple of the form (chroms,start,end,*)
        Returns
        -------
        _region, eigvals, eigvecs -> ndarrays
            array of eigenvalues and an array eigenvectors
        Nr*   balancevalue)rE   rF   r   rG   r    )matrixfetchbioframeselectvaluesrK   )region_regionr7   phasing_track_regionphasing_track_region_valuesr   r   rG   clrr   rF   rE   r   r    s          r'   _eachzeigs_cis.<locals>._each  s     !*JJJ//55g>> $#+?='#J#J *>w*G*N''*.'"%5+#
 
 
 ((r)   )r   r   	Exceptionr   r   _load_attrsgetbinsr   r   assign_viewdropnaloccolumnsr   r   r0   copyr   r   indexT)r   r   r   rE   r   rF   rG   r    maprJ   er   eigvec_tableeigvec_columnsev_coleigvals_tableeigval_columnseval_colr   resultsr   _eigvals_eigvecsr&   s   `` `````                r'   eigs_cisr   #  s   P "3''	X'"!	  AA  	X 	X 	XOPPVWW	XsO$GGG   N/NNN
 
	  	1112266~qIII  88::aaa=D /+"
 
 
 'g66==m_[\=]]L#AAAt|O4L995==999N  & &!vV LLNNM>>f>>>N" ) )"$&h!) !) !) !) !) !) !) !) !) !) !)J c%((G (/ : :#8olG44:08
n,-omW55;19#~-..,&&s,   / 
AA		AA% %
B/BBc                 \    	 t           |d          }n&# t          $ r}t          d| d          |d }~ww xY wd }	|	t                     }	nt	          d          |Dt
          j         fd j        D             t           	                                          f         }|d         }
|d         } 
                    |	          |
||
|f         } 	                                |
|         }|+t          | |	|dd
          }|d         j        |
|         }nd }t          ||f|||d|\  }}|                                }t          |          D ]\  }}||d|dz    <   t!          j        t          j        |          d t'          |          D                       }||fS )NTr|   r}   r~   z2views are not currently implemented for eigs_transc                 :    g | ]}                     |          S r/   )offset)r2   chromr   s     r'   r3   zeigs_trans.<locals>.<listcomp>  s%    ;;;5SZZ;;;r)   r   r`   r   r   r   )rE   r   r    r   r8   c                     g | ]
}d |dz    S r   r/   r1   s     r'   r3   zeigs_trans.<locals>.<listcomp>  s%    999a!!a%!!999r)   )datar   )r   r   r   r   NotImplementedErrorr   r_
chromnamesr   r   r   r   r   rw   r   rd   pd	DataFrame
atleast_2dr   )r   r   rE   rj   r   r    kwargsrJ   r   r   lohir7   r   phasing_track_valuesr   r   r   r%   r"   s   `                   r'   
eigs_transr     s%   sO$GGG   N/NNN
 
	 G"3''!"VWWWE;;;;CN;;;S__L
	 
1B	2B

?
++BrE2b5L9A88::beD /+"
 
 
  -W5<RUC# 	 *   GW 99;;Lw'' + +	6$*[Q[[!!l]7##995==999  G L  s    
949)N)r*   Nr   r   N)r*   r^   r8   NF)Nr*   Nrx   N)numpyr   r   scipy.statspandasr   libr   
lib.checksr   r   
lib.commonr   r   r   r(   rK   rV   r]   rw   r   r   r   r/   r)   r'   <module>r      sb                      D D D D D D D D B B B B B B B B 0 0 0 0h UYU U U Up  "   n n n nf m' m' m' m'd A! A! A! A! A! A!r)   