
    DUf_                         d dl Z d dlZd dlZd dlZddlmZ d dlZ	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 ddZ	dd gfd	Z
dd
Zd fdZd Zd ZddZd Zd Zd Zd Zd Zdg fdZdS )    N   )	eigdecompAnyAny_vs_MixedTFc                    t          | t          j                  r(t          | j                  dk    rt          d          n;	 |
rt          d           t          j        |           } n#  t          d          xY w| ||||f         } ||||||f         }|
|||         }|||dk    r|
rt          d           t          j	        t          |                     t          j
        z  }t          j        | |          d	         d
         }t          j        |          }d } |||                   ||<   |g}nt          d          |
rt          d           t          ||          }|
rt          d           t          | ||||	          \  }}|d	         |d         z  dz  }t          j        ||z            t          j        |          z  }|||||fS )a  
    Compute the contrast in M between different sets of pixels specified by
    modality, modality_params or by I, if given.

    Returns the contrast in diagonals with offset s=0..len(M), and the
    weighted average over all s. If 'normalize' is True, contrast is
    normalized to be in [-1,1].

    Parameters
    ----------
    M: 2d array
        Typically a contact matrix.

    modality: string, (optional, default: "AnyAny_vs_Mixed")
        Specifies the contrast modality, potentially together ``modality_params``.
        See :func:`constrast.indicatormat` for valid modalities.

    modality_params: list, (optional if modality=='AnyAny_vs_mixed' or None)
        Parameters required by 'modality'. See :func:`contrast.indicatormat()`
        for valid modalities and required parameters.

        Typical format: ``[bin_types, Type]``, where
            bin_types: 1d array
                types of the bins, e.g. [0,0,1,1,1,0,...]
                Note: the numerical values identifying the types are
                inconsequential they are only used to classify the loci into
                groups.
            Type: number
                one of the values in bin_types: singles out a specific type

        If None, modality_params = [EV>np.nanmean(EV)],
            with EV computed here: eigdecomp.cis_eig(M, phasing_track=phasing_track)[1][0]

    I: indicator matrix, optional
        Specifies the two sets of pixels from which to compute contrast in M.
        If given, this overrides ``modality`` and ``modality_params``.
        If None, computed here as ``contrast.indicatormat(modality, modality_params)``.

    normalize: boolean, optional
        If True, contr_diags is normalized as follows
            contr_norm[s]
                = (contr[s]-1)/(contr[s]+1)
                = (set1_avintens[s] - set2_avintens[s]) / (set1_avintens[s] + set2_avintens[s]).
        The weighted mean, contr, is then computed from contr_norm[s].

    ignore_diags: integer, optional
        Number of diagonals in M to be ignored.

    exclude_nans_from_paircounts: boolean, optional
        If True, pixels with NaN in M are not counted towards valid pixels.

    i0, i1: integers, optional
        Will use (trimmed by len(M)):
            M[i0:i1,i0:i1]
            v[i0:i1],
            phasing_track[i0:i1]
        Note: EV is computed from restriced M if modality_params is None

    phasing_track: 1D numpy array, optional
        len(phasing_track) must be len(M)
        to flip EVs
        only used if modality_params is None

    Returns
    -------
    contr_diags: 1D numpy array, length M
        Contrast in diagonals with offsets s=0..len(M)

    contr: float
        Weighted mean of contrast_diags.
        np.nansum(contrast*p)/np.nansum(p)
        with weights (#set1_pixels(s)*#set2_pixels(s))**.5

    I: 2D array
        The computed (or supplied) indicator matrix.

    modality_params: list
        The computed (or supplied) modality_params useful if modality_params
        was computed here.

    additional_info: list
        [
            set1_intens: 1D array,
                nansum(diag(I*M,s)), i.e. total intensity in set1 for all s
            set1_pixels:  1D array,
                nansum(diag(I)) i.e. number of valid pixels in set1 for all s
            set2_intens: 1D array,
                nansum(diag((1-I)*M)), i.e. total intensity in set2 for all s
            set2_pixels:  1D array,
                nansum(1-diag(I)) i.e. number of valid pixels in set2 for all s
        ]

       z,M was an array, but dimensionality was not 2z... loading M ...zcmap could not be loadedNr   z... getting types from EV...)phasing_trackr   r   c                 2    | t          j        |           k    S Nnpnanmeanxs    W/var/www/html/software/conda/lib/python3.11/site-packages/cooltools/sandbox/contrast.py<lambda>z contrast_diags.<locals>.<lambda>   s    q2:a==/@     zmodality_params=None only allowed with modality='AnyAny_vs_Mixed'. For all other modalityes modality_params has to be given (a list). See contrast.indicatormatrix() for valid modalities and required modality_params.z%... constructing indicator matrix ...z... computing contrast ...)ignore_diagsexclude_nans_from_paircounts	normalize   g      ?)
isinstancer   ndarraylenshape
ValueErrorprintjoblibloadonesnanr   cis_eigisfiniteindicatormatcontrast_diags_indicatormatrixnansum)Mmodalitymodality_paramsIr   r   i0i1r   r   verbose	bin_typesEVval_indsget_bin_identitiescontr_diagsadd_infopcontrs                      r   contrast_diagsr5      s8   V !RZ   	9qw<<1KLLL 	9 +)***AAA	97888 	
"R%B,A}beRUlO %be, 	y",,, :8999GCFFOObf4	&qFFFqI!L;r??%@%@"&8&8H&F&F	(##,+ 0    	;9:::?33  ,*+++:		!%A  K 	hqk!		A IkAo&&15Eq/8;;s   %A* *A;c           	      8   d}t          j        | | j        |d          s't          j        d                    |                     t          j        |          }|t          j        |                    }d |D             }	t          j        |	          rt          d|	          | j
        |j
        k    st          d          t          |           }
t          j        |                              t                    }|r!t           j        |t          j        |           <   t          j        |
          t           j        z  }t          j        |
          t           j        z  }t          j        |
          t           j        z  }t          j        |
          t           j        z  }|r=t#          d           t          j        d	|
d
                              t&                    }t)          ||
          D ]}|r9||v r5t#          d                    t'          ||
z  dz                                 t          j        ||          }t          j        | |          }t          j        |          ||<   t          j        d|z
            ||<   t          j        ||z            ||<   t          j        |d|z
  z            ||<   t          j        d          5  ||z  }||z  }|du r||z
  ||z   z  }n||z  }ddd           n# 1 swxY w Y   ||||g}||fS )a  
    Computes, for all upper diagonals, the 'contrast' in M, namely the ratio
    of average intensities (values) in M in two sets of pixels.

    More formally, for diagonals with offsets s=0..len(M):
        contr[s] = <diag(M,s)>_set1 / <diag(M,s)>_set2)

    If normalize is True, the contrast is normalized as follows:
        contr_norm[s]
            = (conts[s]-1)/(contr[s]+1)
            = (<diag(M,s)>_set1 - <diag(M,s)>_set2) /
              (<diag(M,s)>_set1 + <diag(M,s)>_set2)

    The normalized contrast is between -1 and 1.

    The two sets of pixels are specified by the indicator matrix I as follows:
        set1:    all pixels (i,j) with I[i,j]=1
        set2:    all pixels (i,j) with I[i,j]=0
        neither: all pixels (i,j) with I[i,j]=nan

    If exclude_nans_from_paircounts==True, pixels M(i,j)=nan entail I(i,j)=nan
    (such that those pixels don't count towards neither set1 nor set2).

    Parameters
    ----------
    M: 2D numpy array
        the data, eg. a Hi-C contact matrix

    I: 2D numpy array
        indicator matrix specifying the two sets of pixels
        can contain only
        - zeros (for set1),
        - ones (for set2),
        - NANs (neither)
        otherwise a value error is raised
        must have same shape as M

    ignore_diags: int
        the entries [0:ignore_diags] will be NANs in all returns

    exclude_nans_from_paircounts: boolean
        if True, I(i,j) gets nans for all pixels (i,j) where M is nan

    normalize: boolean, optional
        if True, contrast is normalized as follows:
        contr_norm[s] = (contr[s]-1)/(contr[s]+1)
                      = (set1_avintens[s] - set2_avintens[s]) /
                        (set1_avintens[s] + set2_avintens[s])

    Returns
    -------
    contr_diags: 1D array
        ratio of mean values in M of set1 and set2, by diagonal s

    additional_info: list
        [
            set1_intens: 1D array,
                nansum(diag(I*M,s)), i.e. total intensity in set1 for all s
            set1_pixels:  1D array,
                nansum(diag(I)) i.e. number of valid pixels in set1 for all s
            set2_intens: 1D array,
                nansum(diag((1-I)*M)), i.e. total intensity in set2 for all s
            set2_pixels:  1D array,
                nansum(1-diag(I)) i.e. number of valid pixels in set2 for all s
        ]

    Notes
    -----
    * Lower halves of M and I are ignored but a warning is thrown if M is not
      symmetric.
    * The bare contrast can be computed from the normalized one as
      contr = (contr_norm + 1)/(contr_norm - 1).

    Examples
    --------
    M = [3 2 1        I = [1 1 0
         . 1 1             . 1 0
         . . 1]            . . nan]

    The resulting below quantities are by diagonal with increasing offset
    s=0..len(M):

    set1_intens    = [4, 2, 0]
    set1_pixels    = [2, 1, 0]
    =>
    set1_avintens  = [2, 2, nan]

    set2_intens    = [0, 1, 1]
    set1_pixels    = [0, 1, 1]
    =>
    set2_avintens  = [nan, 1, 1]
    =>
    contrast_diags_indicatormatrix(M, I, ignore_diats=0)
        = set1_avintens / set2_avintens
        = [nan, 2, nan]
          (indeed, only 1st off-diagonal has both types, others have to be nans)

    contrast_diags_indicatormatrix(M, I, ignore_diats=0, normalize=True)
        = (set1_avintens - set2_avintens) / (set1_avintens + set2_avintens)
        = [nan, 1/3, nan]
          indeed, only 1st off-diagonal has both types, others have to be nans

    g:0yE>T)atol	equal_nanz9M is not symmetric to within {}, I'm using the upper halfc                     g | ]}|d v|	S ))g        g      ? ).0is     r   
<listcomp>z2contrast_diags_indicatormatrix.<locals>.<listcomp>1  s"    ;;;q
':':1':':':r   zKI is not an indicator matrix: it contains value(s) other than {0,1,np.nan}:z M and I must have the same shapez... ... starting to loop over sr      z... ... {}% doned   r   ignore)invalidN)r   allcloseTwarningswarnformatuniqueisnananyr   r   r   copyastypefloatr    zerosr   linspaceintrangediagr%   errstate)r&   r)   r   r   r   r,   tolIunanIuillegalvalsLIuseset1_pixelsset1_intensset2_pixelsset2_intensreport_progres_pointssdiagIdiagMset1_avintensset2_avintensr4   additional_infos                           r   r$   r$      s;   b C;q!#C4888 
JQQRUVV	
 	
 	

 
1B}E;;e;;;K	vk 
<
 
 	
 7ag;<<<AA 71::U##D $ #FRXa[[ (1++&K(1++&K(1++&K(1++&K B/000 "Aq" 5 5 < <S A A<## 
8 
8 	?q111$++CA,<,<==>>>a  15))A1u9--A55=11A5AI#677A	X	&	&	& 	2 	2#k1#k1"]2}}7TUEE!M1E	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 #KkJO/!!s    LLLc                 D   t          |           }g }t          t          |                    D ]q}t          j        |          }t          |          D ]6}t          j        t          j        | |          ||         k              ||<   7|                    |           r|S )a  
    count occurrances of each v in vals in all diagonals of I with offsets s=0..len(I)

    Returns
    -------
    list of 1D arrays of floats, one for each v in vals, each of len=len(I)
        default: count 1s and 0s in each diagonal:
                 return [S1,S0] with:
                 S1: nansum(diag(I,s)==1) for each offset s=0..len(I)
                 S0: nansum(diag(I,s)==0) for each offset s=0..len(I)
    )r   rP   r   rM   r%   rQ   append)r)   valsrW   
diagcountsr<   Sr^   s          r   rg   rg   p  s     	AAJ3t99  HQKKq 	7 	7A9RWQ]]d1g566AaDD!r   c                 R   t          j        |           } t          j        | t          j        |                              }at          j                  t          j        t          j                                     t          j        fd|D                       }n|}|S )a  
    Find unique and finite values (bin types) in v.
    If vals is not None, restrict to types in vals (floats and ints are
    considered identical).

    Parameters
    ----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices

    vals: sequence of numbers, optional
        default: unique and finite values in v

    Returns
    -------
    valid_types: 1d array

    Nc                 D    g | ]}|v t          j        |          |S r:   )r   r"   )r;   trf   s     r   r=   zget_types.<locals>.<listcomp>  s+    !S!S!Sa4iiBKPQNNi!iiir   )r   asarrayrG   r"   )vrf   typesvalid_typess    `  r   	get_typesrp     s    & 	
1AIaA'((Ez$ybk$//011j!S!S!S!SU!S!S!STTr   c                 2    | t          j        |           k    S r
   r   r   s    r   r   r     s    QA5F r   c                     t          j        t          |                     t           j        z  }t          j        |           } || |                   ||<   |S )z
    discretizes a (quasi)continuous track (eg. an EV)
    using the supplied lambda function
    by default: split by nanmean

    returns: discretized track, non-finite values are left intact
    )r   r   r   r    r"   )rm   r0   v_discr/   s       r   discretize_trackrt     sI     WSVV__rv%F{1~~H))!H+66F8Mr   c                 @    t          j        |           } | dz
  | dz   z  S Nr   r   rl   rm   s    r   r   r     "    

1AEa!er   c                 @    t          j        |           } d| z   d| z
  z  S rv   rw   rx   s    r   normalize_invr{     ry   r   c                 .   t           t          t          t          t          t
          d}d}|                                D ]
}||z   dz   }| t          d|z              dS | |                                vrt          | dz   |z              ||          | }|S )a  
    get indicatormatrix I with a given modality by calling
    the function specified by 'modality'

    Parameters
    ----------
    modality: string, optional
        valid modalities are listed below in dict 'valid_modalities'
        if modality is None: print a list of valid modalities and return nothing
    params: list, optional
        have to match the parameters of the called indicator matrix functions

    Returns
    -------
    I: 2d array
        the computed indicator matrix

    )r   TypeType_vs_MixedTypeType_vs_TypeOtherTypeType_vs_NontypeNontypeTypeType_vs_RestSegments_vs_Rest 
Nz,valid modalities are:
=====================
z1 is not a valid modality. Valid modalities are: 
)	indicatormat_AnyAny_vs_Mixedindicatormat_TypeType_vs_Mixed"indicatormat_TypeType_vs_TypeOther'indicatormat_TypeType_vs_NontypeNontypeindicatormat_TypeType_vs_Restindicatormat_Segments_vs_Restkeysr   r   )r'   paramsvalid_modalitiesvalid_modalities_nameskr)   s         r   r#   r#     s    , 8;!C&M99   ""$$ C C!7!!;d!B>AWWXXX',,....BC$%
 
 	
 	#"F+AHr   c                    t          j        |           } | j        }t          |           }t          j        |          t           j        z  }d|t          j        |            <   t          j        ||          }|D ]J}t          j        |          t           j        z  }d|| |k    <   t          j        ||          }d||dk    <   K|S )a  
    Parameters
    ----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices
        types are the unique and finite values in v

    Returns
    -------
    I: 2d array
        indicator matrix with the following properties
        I[i,j]=1    if v[i]==v[j],               i.e. thistype-to-thistype for any type
        I[i,j]=nan  if v[i]==nan or v[j]==nan,   i.e. excluding invalid bins
        I[i,j]=0    otherwise,                   i.e. thistype-to-othertype for valid types

    r   r   )r   rl   sizerp   rM   r    rH   outer)rm   rW   rn   v_auxr)   thistype
I_thistypes          r   r   r      s    " 	
1A	AaLLE HQKK"& EE28A;;,
A   bf$ a8mX5
 


  	
!O	
 	
 Hr   c                 B   t          j        |           } | j        }t          j        | |k              st	          j        d           t          |           }t          j        |          t           j        z  }d|| |k    <   t          j	        ||          }d||dk    <   |S )a  
    Parameters
    ----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices

    Type: number
        type for which to compute indicator matrix with properties

    Returns
    -------
    I: 2d array:
        indicator matrix with the following properties
        I[i,j]=1    if v[i]==v[j]==val,    i.e. thistype-to-thistype
        I[i,j]=0    if v[i]~=v[j]==val,    i.e. sometype-to-othertype
        I[i,j]=nan  otherwise              i.e. othertype-to-othertype and invalid bins

    Type not found in vr   r   )
r   rl   r   rI   rD   rE   r   rM   r    r   rm   TyperW   I_AnyAny_vs_Mixedr   r)   s         r   r   r   )  s    & 	
1A	A6!t) -+,,,4	  HQKK"& EE!t)
A !A1Hr   c                    t          j        |           } | j        }t          j        | |k              st	          j        d           	 	 t          j        |          t           j        z  }d|t          j        |            <   t          j	        ||          }t          j        |          }d|| |k    <   t          j	        ||          }	t          j        |          t           j        z  }d|| |k    <   t          j	        ||          }
t          j        ||f          t           j        z  }d||	dk    <   d||
dk    <   t           j        |t          j        |          <   |S )a)  
    Parameters
    -----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices

    Type: number
        type for which to compute indicator matrix with properties

    Returns
    -------
    I: 2d array:
        indicator matrix with the following properties
        I[i,j]=1    if v[i]==v[j]==val,         i.e. thistype-to-thistype
        I[i,j]=0    if v[i]==val xor v[j]==val  i.e. thistype-to-othertype
        I[i,j]=nan  otherwise                   i.e. othertype-to-othertype and invalid bins

    r   r   r   )r   rl   r   rI   rD   rE   r   rM   r    r   ndindexr"   rH   r   )rm   r   rW   r   r   r)   r<   j	I_invalid	I_striped
I_TypeTypes              r   r   r   N  s6   & 	
1A	A6!t) -+,,,(bf$rx{{lHUE**	 

a4iHUE**	 bf$a4iXeU++
HaVrv%)q.*/!#"(9

Hr   c                    t          j        |           } | j        }t          j        | |k              st	          j        d           t          |           d         }t          j        |          t           j        z  }d|| |k    <   t          j	        ||          }t          j        |          t           j        z  }d|| |k    t          j
        |           z  <   t          j	        ||          }d||dk    <   |S )a*  
    Parameters
    -----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices

    Type: number
        type for which to compute indicator matrix with properties

    Returns
    -------
    I: 2d array:
        indicator matrix with the following properties
        I[i,j]=1    if v[i]==v[j]==val,         i.e. thistype-to-thistype
        I[i,j]=0    if v[i]!=val and v[j]!=val  i.e. anything not involving type
        I[i,j]=nan  otherwise                   i.e. type-to-othertype and invalid bins

    r   r   r   )r   rl   r   rI   rD   rE   r   rM   r    r   r"   )rm   r   rW   r   r   r)   I_auxs          r   r   r     s    & 	
1A	A6!t) -+,,, 5Q77:HQKK"& EE!t)
AHQKK"& E*+E19A
&'HUE""EAeqjMHr   c                 j   t          j        |           } | j        }t          j        | |k              st	          j        d           t          |           d         }t          j        |          t           j        z  }d|t          j	        |           <   d|| |k    <   t          j
        ||          }|S )a2  
    Parameters
    -----------
    v: sequence of numbers
        a vector of "types" used to construct indicator matrices

    Type: number
        type for which to compute indicator matrix with properties

    Returns
    -------
    I: 2d array
        indicator matrix with the following properties
        I[i,j]=1    if v[i]==v[j]==val,          i.e. thistype-to-thistype
        I[i,j]=nan  if v[i]==nan or v[j]==nan,   i.e. excluding invalid bins
        I[i,j]=0    otherwise,                   i.e. thistype-to-othertype and anytype-to-anyother

    r   r   r   )r   rl   r   rI   rD   rE   r   rM   r    r"   r   r   s         r   r   r     s    ( 	
1A	A6!t) -+,,, 5Q77: HQKK"& EE"+a..E!t)
AHr   c                 n   || d         d         }t          j        |          }|g k    r!t           j        |t          j        |          <   t          j        ||          }g }| D ]F\  }}t          ||          }|                    ||f           |||||f         dz   |||||f<   Gd||dk    <   |S )al  
    Parameters
    ----------
    segments: list of tuples
        [(s0,e0),(s1,e1), ...]
        where (s,e) are the start and end points of segments
        (endpoints are exclusive)
        Note: endpoints are trimmed to L

    L: integer, optional
        default: last endpoint

    bad_bins: list of integers, optional
        bad bins, rows and comuns get np.nan throughout

    Returns
    -------
    I: 2d array
        indicator matrices with the following properties
        I[i,j]=1    if i and j within same segment
        I[i,j]=nan  if i in bad_bins or j in bad_bins
        I[i,j]=0    otherwise

    Nr   )r   rM   r    rl   r   minre   )segmentsrW   bad_binsr   r)   segments_trimmedr^   es           r   r   r     s    2 	yRLOHQKKE2~~&(fbj""#
A & &11IIA'''!QqSkAo!A#qs(Aa!eHHr   )
r   NNr   TNNNFF)r   TTFr
   )NN)rD   osnumpyr   r   r   r   coolerr5   r$   rg   rp   rt   r   r{   r#   r   r   r   r   r   r   r:   r   r   <module>r      s    				             
!%f< f< f< f<X !%q" q" q" q"r 1v    ,   @ ,G+F        / / / /v& & &R" " "J= = =@% % %P# # #L /3R ( ( ( ( ( (r   