
    t]e!                         d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ  G d dee          ZdS )z(Metrics to perform pairwise computation.    N)distance_matrix)BaseEstimator)check_consistent_length)unique_labels)check_is_fitted   )_ParamsValidationMixin)
StrOptionsc                   ~    e Zd ZU dZ edh          dgej        gej        gdZee	d<   dddddZ
d	 ZddZd Zd
S )ValueDifferenceMetrica{  Class implementing the Value Difference Metric.

    This metric computes the distance between samples containing only
    categorical features. The distance between feature values of two samples is
    defined as:

    .. math::
       \delta(x, y) = \sum_{c=1}^{C} |p(c|x_{f}) - p(c|y_{f})|^{k} \ ,

    where :math:`x` and :math:`y` are two samples and :math:`f` a given
    feature, :math:`C` is the number of classes, :math:`p(c|x_{f})` is the
    conditional probability that the output class is :math:`c` given that
    the feature value :math:`f` has the value :math:`x` and :math:`k` an
    exponent usually defined to 1 or 2.

    The distance for the feature vectors :math:`X` and :math:`Y` is
    subsequently defined as:

    .. math::
       \Delta(X, Y) = \sum_{f=1}^{F} \delta(X_{f}, Y_{f})^{r} \ ,

    where :math:`F` is the number of feature and :math:`r` an exponent usually
    defined equal to 1 or 2.

    The definition of this distance was propoed in [1]_.

    Read more in the :ref:`User Guide <vdm>`.

    .. versionadded:: 0.8

    Parameters
    ----------
    n_categories : "auto" or array-like of shape (n_features,), default="auto"
        The number of unique categories per features. If `"auto"`, the number
        of categories will be computed from `X` at `fit`. Otherwise, you can
        provide an array-like of such counts to avoid computation. You can use
        the fitted attribute `categories_` of the
        :class:`~sklearn.preprocesssing.OrdinalEncoder` to deduce these counts.

    k : int, default=1
        Exponent used to compute the distance between feature value.

    r : int, default=2
        Exponent used to compute the distance between the feature vector.

    Attributes
    ----------
    n_categories_ : ndarray of shape (n_features,)
        The number of categories per features.

    proba_per_class_ : list of ndarray of shape (n_categories, n_classes)
        List of length `n_features` containing the conditional probabilities
        for each category given a class.

    n_features_in_ : int
        Number of features in the input dataset.

        .. versionadded:: 0.10

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.10

    See Also
    --------
    sklearn.neighbors.DistanceMetric : Interface for fast metric computation.

    Notes
    -----
    The input data `X` are expected to be encoded by an
    :class:`~sklearn.preprocessing.OrdinalEncoder` and the data type is used
    should be `np.int32`. If other data types are given, `X` will be converted
    to `np.int32`.

    References
    ----------
    .. [1] Stanfill, Craig, and David Waltz. "Toward memory-based reasoning."
       Communications of the ACM 29.12 (1986): 1213-1228.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array(["green"] * 10 + ["red"] * 10 + ["blue"] * 10).reshape(-1, 1)
    >>> y = [1] * 8 + [0] * 5 + [1] * 7 + [0] * 9 + [1]
    >>> from sklearn.preprocessing import OrdinalEncoder
    >>> encoder = OrdinalEncoder(dtype=np.int32)
    >>> X_encoded = encoder.fit_transform(X)
    >>> from imblearn.metrics.pairwise import ValueDifferenceMetric
    >>> vdm = ValueDifferenceMetric().fit(X_encoded, y)
    >>> pairwise_distance = vdm.pairwise(X_encoded)
    >>> pairwise_distance.shape
    (30, 30)
    >>> X_test = np.array(["green", "red", "blue"]).reshape(-1, 1)
    >>> X_test_encoded = encoder.transform(X_test)
    >>> vdm.pairwise(X_test_encoded)
    array([[0.  ,  0.04,  1.96],
           [0.04,  0.  ,  1.44],
           [1.96,  1.44,  0.  ]])
    autoz
array-liken_categorieskr_parameter_constraints   r   c                0    || _         || _        || _        d S Nr   )selfr   r   r   s       9lib/python3.11/site-packages/imblearn/metrics/pairwise.py__init__zValueDifferenceMetric.__init__   s    (    c           	      x   |                                   t          ||           |                     ||dt          j                  \  }}t          | j        t                    r*| j        dk    r|                    d          dz   | _	        njt          | j                  | j        k    r-t          dt          | j                   d| j         d	          t          j        | j        d
          | _	        t          |          fd| j	        D             | _        t!          | j                  D ]V}t#                    D ]D\  }}t          j        |||k    |f         | j	        |                   | j        |         dd|f<   EWt          j        d          5  t!          | j                  D ]k}| j        |xx         | j        |                             d                              dd          z  cc<   t          j        | j        |         d
           l	 ddd           n# 1 swxY w Y   | S )a  Compute the necessary statistics from the training set.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        y : ndarray of shape (n_features,)
            The target.

        Returns
        -------
        self : object
            Return the instance itself.
        Tresetdtyper   r   )axisr   zRThe length of n_categories is not consistent with the number of feature in X. Got z elements in n_categories and z in X.F)copyc                 n    g | ]1}t          j        |t                    ft           j                   2S )shaper   )npemptylenfloat64).0n_catclassess     r   
<listcomp>z-ValueDifferenceMetric.fit.<locals>.<listcomp>   sE     !
 !
 !
 HE3w<<0
CCC!
 !
 !
r   )	minlengthNignore)invalid)_validate_paramsr   _validate_datar#   int32
isinstancer   strmaxn_categories_r%   n_features_in_
ValueErrorarrayr   proba_per_class_range	enumeratebincounterrstatesumreshape
nan_to_num)r   Xyfeature_idx	klass_idxklassr)   s         @r   fitzValueDifferenceMetric.fit   s   " 	1%%%""1at28"DD1d'-- 	I$2Cv2M2M!"A!2D4$%%)<<< 36t7H3I3I 484G     "$$*;%!H!H!HD""!
 !
 !
 !
+!
 !
 !
 !!455 	 	K$-g$6$6   	5CE;a5j+-."0=D D D%k2111i<@@ [*** 	N 	N$T%899 N N%k222)+6:::BBJJ2qQQ222 d3K@uMMMMM	N	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N s   !BH//H36H3Nc                    t          |            |                     |dt          j                  }|j        d         }|0|                     |dt          j                  }|j        d         }n|}t          j        ||ft          j                  }t          | j                  D ]c}| j	        |         |dd|f                  }|| j	        |         |dd|f                  }n|}|t          ||| j                  | j        z  z  }d|S )am  Compute the VDM distance pairwise.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        Y : ndarray of shape (n_samples, n_features), dtype=np.int32
            The input data. The data are expected to be encoded with a
            :class:`~sklearn.preprocessing.OrdinalEncoder`.

        Returns
        -------
        distance_matrix : ndarray of shape (n_samples, n_samples)
            The VDM pairwise distance.
        Fr   r   Nr!   )p)r   r0   r#   r1   r"   zerosr&   r:   r6   r9   r   r   r   )	r   rA   Yn_samples_Xn_samples_YdistancerC   proba_feature_Xproba_feature_Ys	            r   pairwisezValueDifferenceMetric.pairwise   s   $ 	bh??gaj=##AU"(#CCA'!*KK%K8;"<BJOOO !455 	 	K"3K@111k>ARSO}"&"7"DQqqq+~EV"W"1DFKKKtvUHH r   c                 
    ddiS )Nrequires_positive_XT )r   s    r   
_more_tagsz ValueDifferenceMetric._more_tags   s    !4
 	
r   r   )__name__
__module____qualname____doc__r
   numbersIntegralr   dict__annotations__r   rF   rP   rT   rS   r   r   r   r      s         d dL $VH--|<$ $D    (.a     
9 9 9v& & & &P
 
 
 
 
r   r   )rX   rY   numpyr#   scipy.spatialr   sklearn.baser   sklearn.utilsr   sklearn.utils.multiclassr   sklearn.utils.validationr   baser	   utils._param_validationr
   r   rS   r   r   <module>re      s    . .
      ) ) ) ) ) ) & & & & & & 1 1 1 1 1 1 2 2 2 2 2 2 4 4 4 4 4 4 ) ) ) ) ) ) 0 0 0 0 0 0W
 W
 W
 W
 W
2M W
 W
 W
 W
 W
r   