
    t]eF                        d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ 	 dd
lmZmZ n# eef$ r ddlmZ ddlmZ Y nw xY wddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6  eej7                  Z8 e&e$j9        e,e-           G d dee                      Z:dS )z9Bagging classifier trained on balanced bootstrap samples.    N)clone)BaggingClassifier)_parallel_decision_function)_partition_estimators)DecisionTreeClassifier)parse_version)check_is_fitted)Paralleldelayed)r
   )r      )_ParamsValidationMixin)Pipeline)RandomUnderSampler)BaseUnderSampler)Substitutioncheck_sampling_strategycheck_target_type)available_if)_n_jobs_docstring_random_state_docstring)
HasMethodsInterval
StrOptions)_fit_context   )_bagging_parameter_constraints_estimator_has)sampling_strategyn_jobsrandom_statec                       e Zd ZdZe ed          k    r ej        ej	                  Z	n ej        e
          Z	e	                     eej        ddd           eh d          eegdg ed	g          d
gd           	 	 dddddddddd
d
dd
dd fdZ fdZ e            fdZed             Zed             Z ed           fd            Zd fd	Z e ed                    d             Z fdZ  xZ!S ) BalancedBaggingClassifieru  A Bagging classifier with additional balancing.

    This implementation of Bagging is similar to the scikit-learn
    implementation. It includes an additional step to balance the training set
    at fit time using a given sampler.

    This classifier can serves as a basis to implement various methods such as
    Exactly Balanced Bagging [6]_, Roughly Balanced Bagging [7]_,
    Over-Bagging [6]_, or SMOTE-Bagging [8]_.

    Read more in the :ref:`User Guide <bagging>`.

    Parameters
    ----------
    estimator : estimator object, default=None
        The base estimator to fit on random subsets of the dataset.
        If None, then the base estimator is a decision tree.

        .. versionadded:: 0.10

    n_estimators : int, default=10
        The number of base estimators in the ensemble.

    max_samples : int or float, default=1.0
        The number of samples to draw from X to train each base estimator.

        - If int, then draw ``max_samples`` samples.
        - If float, then draw ``max_samples * X.shape[0]`` samples.

    max_features : int or float, default=1.0
        The number of features to draw from X to train each base estimator.

        - If int, then draw ``max_features`` features.
        - If float, then draw ``max_features * X.shape[1]`` features.

    bootstrap : bool, default=True
        Whether samples are drawn with replacement.

        .. note::
           Note that this bootstrap will be generated from the resampled
           dataset.

    bootstrap_features : bool, default=False
        Whether features are drawn with replacement.

    oob_score : bool, default=False
        Whether to use out-of-bag samples to estimate
        the generalization error.

    warm_start : bool, default=False
        When set to True, reuse the solution of the previous call to fit
        and add more estimators to the ensemble, otherwise, just fit
        a whole new ensemble.

    {sampling_strategy}

    replacement : bool, default=False
        Whether or not to randomly sample with replacement or not when
        `sampler is None`, corresponding to a
        :class:`~imblearn.under_sampling.RandomUnderSampler`.

    {n_jobs}

    {random_state}

    verbose : int, default=0
        Controls the verbosity of the building process.

    sampler : sampler object, default=None
        The sampler used to balanced the dataset before to bootstrap
        (if `bootstrap=True`) and `fit` a base estimator. By default, a
        :class:`~imblearn.under_sampling.RandomUnderSampler` is used.

        .. versionadded:: 0.8

    base_estimator : estimator object, default=None
        The base estimator to fit on random subsets of the dataset.
        If None, then the base estimator is a decision tree.

        .. deprecated:: 0.10
           `base_estimator` was renamed to `estimator` in version 0.10 and
           will be removed in 0.12.

    Attributes
    ----------
    estimator_ : estimator
        The base estimator from which the ensemble is grown.

        .. versionadded:: 0.10

    base_estimator_ : estimator
        The base estimator from which the ensemble is grown.

        .. deprecated:: 1.2
           `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be
           removed in 1.4. Use `estimator_` instead. When the minimum version
           of `scikit-learn` supported by `imbalanced-learn` will reach 1.4,
           this attribute will be removed.

    n_features_ : int
        The number of features when `fit` is performed.

        .. deprecated:: 1.0
           `n_features_` is deprecated in `scikit-learn` 1.0 and will be removed
           in version 1.2. When the minimum version of `scikit-learn` supported
           by `imbalanced-learn` will reach 1.2, this attribute will be removed.

    estimators_ : list of estimators
        The collection of fitted base estimators.

    sampler_ : sampler object
        The validate sampler created from the `sampler` parameter.

    estimators_samples_ : list of ndarray
        The subset of drawn samples (i.e., the in-bag samples) for each base
        estimator. Each subset is defined by a boolean mask.

    estimators_features_ : list of ndarray
        The subset of drawn features for each base estimator.

    classes_ : ndarray of shape (n_classes,)
        The classes labels.

    n_classes_ : int or list
        The number of classes.

    oob_score_ : float
        Score of the training dataset obtained using an out-of-bag estimate.

    oob_decision_function_ : ndarray of shape (n_samples, n_classes)
        Decision function computed with out-of-bag estimate on the training
        set. If n_estimators is small it might be possible that a data point
        was never left out during the bootstrap. In this case,
        ``oob_decision_function_`` might contain NaN.

    n_features_in_ : int
        Number of features in the input dataset.

        .. versionadded:: 0.9

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during `fit`. Defined only when `X` has feature
        names that are all strings.

        .. versionadded:: 0.9

    See Also
    --------
    BalancedRandomForestClassifier : Random forest applying random-under
        sampling to balance the different bootstraps.

    EasyEnsembleClassifier : Ensemble of AdaBoost classifier trained on
        balanced bootstraps.

    RUSBoostClassifier : AdaBoost classifier were each bootstrap is balanced
        using random-under sampling at each round of boosting.

    Notes
    -----
    This is possible to turn this classifier into a balanced random forest [5]_
    by passing a :class:`~sklearn.tree.DecisionTreeClassifier` with
    `max_features='auto'` as a base estimator.

    See
    :ref:`sphx_glr_auto_examples_ensemble_plot_comparison_ensemble_classifier.py`.

    References
    ----------
    .. [1] L. Breiman, "Pasting small votes for classification in large
           databases and on-line", Machine Learning, 36(1), 85-103, 1999.

    .. [2] L. Breiman, "Bagging predictors", Machine Learning, 24(2), 123-140,
           1996.

    .. [3] T. Ho, "The random subspace method for constructing decision
           forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844,
           1998.

    .. [4] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine
           Learning and Knowledge Discovery in Databases, 346-361, 2012.

    .. [5] C. Chen Chao, A. Liaw, and L. Breiman. "Using random forest to
           learn imbalanced data." University of California, Berkeley 110,
           2004.

    .. [6] R. Maclin, and D. Opitz. "An empirical evaluation of bagging and
           boosting." AAAI/IAAI 1997 (1997): 546-551.

    .. [7] S. Hido, H. Kashima, and Y. Takahashi. "Roughly balanced bagging
           for imbalanced data." Statistical Analysis and Data Mining: The ASA
           Data Science Journal 2.5‐6 (2009): 412-426.

    .. [8] S. Wang, and X. Yao. "Diversity analysis on imbalanced data sets by
           using ensemble models." 2009 IEEE symposium on computational
           intelligence and data mining. IEEE, 2009.

    Examples
    --------
    >>> from collections import Counter
    >>> from sklearn.datasets import make_classification
    >>> from sklearn.model_selection import train_test_split
    >>> from sklearn.metrics import confusion_matrix
    >>> from imblearn.ensemble import BalancedBaggingClassifier
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
    >>> print('Original dataset shape %s' % Counter(y))
    Original dataset shape Counter({{1: 900, 0: 100}})
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
    ...                                                     random_state=0)
    >>> bbc = BalancedBaggingClassifier(random_state=42)
    >>> bbc.fit(X_train, y_train)
    BalancedBaggingClassifier(...)
    >>> y_pred = bbc.predict(X_test)
    >>> print(confusion_matrix(y_test, y_pred))
    [[ 23   0]
     [  2 225]]
    z1.3r   r   right)closed>   not majoritynot minorityallautomajoritybooleanfit_resampleN)r   replacementsampler
   g      ?TFr(   
deprecated)max_samplesmax_features	bootstrapbootstrap_features	oob_score
warm_startr   r,   r   r    verboser-   base_estimatorc                    t          j        t                      j                  }d|i}d|j        v r||d<   n|| _         t                      j        di |||||||||||d
 |	| _        |
| _        || _        d S )Nr7   	estimator)
n_estimatorsr0   r1   r2   r3   r4   r5   r   r    r6    )	inspect	signaturesuper__init__
parametersr9   r   r,   r-   )selfr9   r:   r0   r1   r2   r3   r4   r5   r   r,   r   r    r6   r-   r7   bagging_classifier_signatureestimator_params	__class__s                     :lib/python3.11/site-packages/imblearn/ensemble/_bagging.pyr?   z"BalancedBaggingClassifier.__init__  s    ( (/'89I'J'J$,n=6AAA,5[))&DN 	
 	
	
%#%1!%	
 	
 	
 	
 	
 "3&    c                 @    t                                          |          }t           j        t                    rT j        j        dk    rD fdt           j        | j        j                                                  D              _	        n j         _	        |S )Nbypassc                 j    i | ]/\  }}t          j        j        |k              d          d          |0S )r   )npwhereclasses_).0keyvaluerA   s      rE   
<dictcomp>z9BalancedBaggingClassifier._validate_y.<locals>.<dictcomp>Q  sJ     ' ' 'C #-..q1!4e' ' 'rF   )
r>   _validate_y
isinstancer   dictsampler__sampling_typer   items_sampling_strategy)rA   y	y_encodedrD   s   `  rE   rQ   z%BalancedBaggingClassifier._validate_yK  s    GG''**	t-t44	=,88' ' ' '"9*M0# # %''' ' 'D## '+&<D#rF   c                    | j         | j        dvrt          d          | j         t          | j                   }nG| j        dvr/t	          j        dt                     t          | j                  }nt          |          }| j        j        dk    r | j        	                    | j
                   t          d| j        fd|fg          | _        	 | j        | _        dS # t          $ r Y dS w xY w)	zZCheck the estimator and the n_estimator attribute, set the
        `estimator_` attribute.N)Nr/   zEBoth `estimator` and `base_estimator` were set. Only set `estimator`.zX`base_estimator` was renamed to `estimator` in version 0.10 and will be removed in 0.12.rH   )r   r-   
classifier)r9   r7   
ValueErrorr   warningswarnFutureWarningrT   rU   
set_paramsrW   r   
_estimatorbase_estimator_AttributeError)rA   defaultr7   s      rE   _validate_estimatorz-BalancedBaggingClassifier._validate_estimator]  s    >%';;;W   >%"4>22NN (<<<M+  
 #4#677NN"7^^N='833M$$t7N$OOO"',)GH
 
	#'?D    	 	 	DD	s   C! !
C/.C/c                     | j         S )z$Estimator used to grow the ensemble.)ra   rA   s    rE   
estimator_z$BalancedBaggingClassifier.estimator_  s     rF   c                 D    t          j        dt                     | j        S )z-Number of features when ``fit`` is performed.z`n_features_` was deprecated in scikit-learn 1.0. This attribute will not be accessible when the minimum supported version of scikit-learn is 1.2.)r]   r^   r_   n_features_in_rg   s    rE   n_features_z%BalancedBaggingClassifier.n_features_  s-     	 		
 	
 	
 ""rF   )prefer_skip_nested_validationc                 p    |                                   t                                          ||          S )a+  Build a Bagging ensemble of estimators from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        y : array-like of shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).

        Returns
        -------
        self : object
            Fitted estimator.
        )_validate_paramsr>   fit)rA   XrX   rD   s      rE   ro   zBalancedBaggingClassifier.fit  s/    ( 	ww{{1a   rF   c                     t          |           | j        t          | j                  | _        nt          | j                  | _        t                                          ||| j        d           S )N)r,   )sample_weight)	r   r-   r   r,   rT   r   r>   _fitr0   )rA   rp   rX   r0   	max_depthrr   rD   s         rE   rs   zBalancedBaggingClassifier._fit  so    ! <. ,  DMM "$,//DM ww||Aq$"2$|GGGrF   decision_functionc                 @    t                                           ddgddd          t           j         j                  \  }} t          | j                   fdt          |          D                       }t          |           j        z  }|S )a  Average of the decision functions of the base classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.

        Returns
        -------
        score : ndarray of shape (n_samples, k)
            The decision function of the input samples. The columns correspond
            to the classes in sorted order, as they appear in the attribute
            ``classes_``. Regression and binary classification are special
            cases with ``k == 1``, otherwise ``k==n_classes``.
        csrcscNF)accept_sparsedtypeforce_all_finitereset)r   r6   c           	   3      K   | ][} t          t                    j        |         |d z                     j        |         |d z                               V  \dS )r   N)r   r   estimators_estimators_features_)rM   irp   rA   startss     rE   	<genexpr>z>BalancedBaggingClassifier.decision_function.<locals>.<genexpr>  s       F
 F
  1G/00 VAE]!:;)&)fQUm*CD F
 F
 F
 F
 F
 F
rF   )	r	   _validate_datar   r:   r   r
   r6   rangesum)rA   rp   r   _all_decisions	decisionsr   s   ``    @rE   ru   z+BalancedBaggingClassifier.decision_function  s    $ 	  %."   
 
 2$2CT[QQ6EEEE F
 F
 F
 F
 F
 F
 6]]F
 F
 F
 
 
 &&)::	rF   c                     t                                                      }d}d}d}||v r|||         |<   n||i||<   |S )N_xfail_checkscheck_estimators_nan_infz9Fails because the sampler removed infinity and NaN values)r>   
_more_tags)rA   tagstags_keyfailing_testreasonrD   s        rE   r   z$BalancedBaggingClassifier._more_tags  sV    ww!!##"1Lt+1DN<((*F3DNrF   )Nr.   )NNN)"__name__
__module____qualname____doc__sklearn_versionr   copydeepcopyr   _parameter_constraintsr   updater   numbersRealr   rS   callabler   r?   rQ   r   re   propertyrh   rk   r   ro   rs   r   r   ru   r   __classcell__)rD   s   @rE   r"   r"   +   sU       Y Yx --....!./@/W!X!X!./M!N!N!! q!G<<<
VVVWW	" &;"
N#344d;		
 		
   *
   ##* * * * * * *X    $ +A*@*B*B        F   X
 # # X# \666! ! ! ! 76!,H H H H H H  \..!45566+ + 76+Z	 	 	 	 	 	 	 	 	rF   r"   );r   r   r<   r   r]   numpyrJ   sklearnsklearn.baser   sklearn.ensembler   sklearn.ensemble._baggingr   sklearn.ensemble._baser   sklearn.treer   sklearn.utilsr   sklearn.utils.validationr	   sklearn.utils.parallelr
   r   ImportErrorModuleNotFoundErrorjoblibsklearn.utils.fixesbaser   pipeliner   under_samplingr   under_sampling.baser   utilsr   r   r   utils._available_ifr   utils._docstringr   r   utils._param_validationr   r   r   utils.fixesr   _commonr   r   __version__r   _sampling_strategy_docstringr"   r;   rF   rE   <module>r      s   ? ?                . . . . . . A A A A A A 8 8 8 8 8 8 / / / / / / ' ' ' ' ' ' 4 4 4 4 4 4,888888888() , , ,++++++++, * ) ) ) ) )       / / / / / / 2 2 2 2 2 2 L L L L L L L L L L . . . . . . I I I I I I I I F F F F F F F F F F & & & & & & C C C C C C C C- 344 &C(  
     68I   
  s   A A%$A%