
    _nd=                        d Z ddlZddlmZmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZ ddlmZ d	d
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ  ej        ej                  j         Z!d Z"ddZ#d Z$d Z% G d dee          Z&dS )z<
A Theil-Sen Estimator for Multiple Linear Regression Model
    N)IntegralReal)combinations)linalg)binom)get_lapack_funcs)effective_n_jobs   )LinearModel   )RegressorMixin)check_random_state)Interval)delayedParallel)ConvergenceWarningc                 p   | |z
  }t          j        t          j        |dz  d                    }|t          k    }t	          |                                | j        d         k               }||         }||         ddt           j        f         }t          j        t          j        ||z  d                    }|t          k    r>t          j        | |ddf         |z  d          t          j        d|z  d          z  }nd}d}t          dd||z  z
            |z  t          d||z            |z  z   S )u	  Modified Weiszfeld step.

    This function defines one iteration step in order to approximate the
    spatial median (L1 median). It is a form of an iteratively re-weighted
    least squares method.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Training vector, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    x_old : ndarray of shape = (n_features,)
        Current start vector.

    Returns
    -------
    x_new : ndarray of shape (n_features,)
        New iteration step.

    References
    ----------
    - On Computation of Spatial Median for Robust Data Mining, 2005
      T. Kärkkäinen and S. Äyrämö
      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
    r   r
   axisr   Ng      ?        )npsqrtsum_EPSILONintshapenewaxisr   normmaxmin)Xx_olddiff	diff_normmaskis_x_old_in_Xquotient_normnew_directions           ?lib/python3.11/site-packages/sklearn/linear_model/_theil_sen.py_modified_weiszfeld_stepr*      s>   6 u9DtQwQ///00I D

QWQZ/00M:D$2:.IKti'7a @ @ @AAMxqqqqzI5A>>>	MB
 B
 B
 
  	C}}4455E
c==0
1
1E
9	:    ,  MbP?c                    | j         d         dk    r*dt          j        |                                 d          fS |dz  }t          j        | d          }t          |          D ]4}t          | |          }t          j        ||z
  dz            |k     r n1|}5t          j	        d
                    |          t                     ||fS )	u	  Spatial median (L1 median).

    The spatial median is member of a class of so-called M-estimators which
    are defined by an optimization problem. Given a number of p points in an
    n-dimensional space, the point x minimizing the sum of all distances to the
    p other points is called spatial median.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Training vector, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    max_iter : int, default=300
        Maximum number of iterations.

    tol : float, default=1.e-3
        Stop the algorithm if spatial_median has converged.

    Returns
    -------
    spatial_median : ndarray of shape = (n_features,)
        Spatial median.

    n_iter : int
        Number of iterations needed.

    References
    ----------
    - On Computation of Spatial Median for Robust Data Mining, 2005
      T. Kärkkäinen and S. Äyrämö
      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
    r
   T)keepdimsr   r   r   zYMaximum number of iterations {max_iter} reached in spatial median for TheilSen regressor.)max_iter)r   r   medianravelmeanranger*   r   warningswarnformatr   )r!   r0   tolspatial_median_oldn_iterspatial_medians         r)   _spatial_medianr<   Q   s    D 	wqzQ")AGGII55555AIC+++// 
 
1!5GHH6%61<==CCE!/vxv((		
 	
 	
 >!!r+   c                 <    ddd|z  z  | |z
  dz   z  |z   dz
  | z  z
  S )a  Approximation of the breakdown point.

    Parameters
    ----------
    n_samples : int
        Number of samples.

    n_subsamples : int
        Number of subsamples to consider.

    Returns
    -------
    breakdown_point : float
        Approximation of breakdown point.
    r
   g      ? )	n_samplesn_subsampless     r)   _breakdown_pointrA      sG    " 	
A$%\)AA)EF 	r+   c                    t          |          }| j        d         |z   }|j        d         }t          j        |j        d         |f          }t          j        ||f          }t          j        t          ||                    }t          d||f          \  }	t          |          D ]D\  }
}| |ddf         |dd|df<   ||         |d|<    |	||          d         d|         ||
<   E|S )a  Least Squares Estimator for TheilSenRegressor class.

    This function calculates the least squares method on a subset of rows of X
    and y defined by the indices array. Optionally, an intercept column is
    added if intercept is set to true.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Design matrix, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    y : ndarray of shape (n_samples,)
        Target vector, where `n_samples` is the number of samples.

    indices : ndarray of shape (n_subpopulation, n_subsamples)
        Indices of all subsamples with respect to the chosen subpopulation.

    fit_intercept : bool
        Fit intercept or not.

    Returns
    -------
    weights : ndarray of shape (n_subpopulation, n_features + intercept)
        Solution matrix of n_subpopulation solved least square problems.
    r
   r   )gelssN)	r   r   r   emptyoneszerosr   r   	enumerate)r!   yindicesfit_intercept
n_featuresr@   weightsX_subpopulationy_subpopulationlstsqindexsubsets               r)   _lstsqrR      s   6 &&Mm+J=#Lha(*566Gg|Z899OhL* = =??O
_o,NOOHU"7++ Q Qv-.vqqqy\=>>)*)*6&@@CKZKPNr+   c                       e Zd ZU dZdgdg eeddd          gdeg eeddd          g eeddd          gd	gdegd
gd	Zee	d<   dddddddddd	dZ
d Zd ZdS )TheilSenRegressoraR  Theil-Sen Estimator: robust multivariate regression model.

    The algorithm calculates least square solutions on subsets with size
    n_subsamples of the samples in X. Any value of n_subsamples between the
    number of features and samples leads to an estimator with a compromise
    between robustness and efficiency. Since the number of least square
    solutions is "n_samples choose n_subsamples", it can be extremely large
    and can therefore be limited with max_subpopulation. If this limit is
    reached, the subsets are chosen randomly. In a final step, the spatial
    median (or L1 median) is calculated of all least square solutions.

    Read more in the :ref:`User Guide <theil_sen_regression>`.

    Parameters
    ----------
    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations.

    copy_X : bool, default=True
        If True, X will be copied; else, it may be overwritten.

    max_subpopulation : int, default=1e4
        Instead of computing with a set of cardinality 'n choose k', where n is
        the number of samples and k is the number of subsamples (at least
        number of features), consider only a stochastic subpopulation of a
        given maximal size if 'n choose k' is larger than max_subpopulation.
        For other than small problem sizes this parameter will determine
        memory usage and runtime if n_subsamples is not changed. Note that the
        data type should be int but floats such as 1e4 can be accepted too.

    n_subsamples : int, default=None
        Number of samples to calculate the parameters. This is at least the
        number of features (plus 1 if fit_intercept=True) and the number of
        samples as a maximum. A lower number leads to a higher breakdown
        point and a low efficiency while a high number leads to a low
        breakdown point and a high efficiency. If None, take the
        minimum number of subsamples leading to maximal robustness.
        If n_subsamples is set to n_samples, Theil-Sen is identical to least
        squares.

    max_iter : int, default=300
        Maximum number of iterations for the calculation of spatial median.

    tol : float, default=1e-3
        Tolerance when calculating spatial median.

    random_state : int, RandomState instance or None, default=None
        A random number generator instance to define the state of the random
        permutations generator. Pass an int for reproducible output across
        multiple function calls.
        See :term:`Glossary <random_state>`.

    n_jobs : int, default=None
        Number of CPUs to use during the cross validation.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    verbose : bool, default=False
        Verbose mode when fitting the model.

    Attributes
    ----------
    coef_ : ndarray of shape (n_features,)
        Coefficients of the regression model (median of distribution).

    intercept_ : float
        Estimated intercept of regression model.

    breakdown_ : float
        Approximated breakdown point.

    n_iter_ : int
        Number of iterations needed for the spatial median.

    n_subpopulation_ : int
        Number of combinations taken into account from 'n choose k', where n is
        the number of samples and k is the number of subsamples.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    HuberRegressor : Linear regression model that is robust to outliers.
    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
      Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
      http://home.olemiss.edu/~xdang/papers/MTSE.pdf

    Examples
    --------
    >>> from sklearn.linear_model import TheilSenRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y)
    0.9884...
    >>> reg.predict(X[:1,])
    array([-31.5871...])
    booleanr
   Nleft)closedr   r   random_stateverbose	rJ   copy_Xmax_subpopulationr@   r0   r8   rX   n_jobsrY   _parameter_constraintsTg     @r,   r-   Fc       	             || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S NrZ   )
selfrJ   r[   r\   r@   r0   r8   rX   r]   rY   s
             r)   __init__zTheilSenRegressor.__init__R  sK     +!2( (r+   c           	         | j         }| j        r|dz   }n|}|||k    r#t          d                    ||                    ||k    r6||k    r/| j        rdnd}t          d                    |||                    n:||k    r#t          d                    ||                    nt	          ||          }t          dt          j        t          ||                              }t          t	          | j
        |                    }||fS )Nr
   z=Invalid parameter since n_subsamples > n_samples ({0} > {1}).z+1 zAInvalid parameter since n_features{0} > n_subsamples ({1} > {2}).z\Invalid parameter since n_subsamples != n_samples ({0} != {1}) while n_samples < n_features.)r@   rJ   
ValueErrorr7   r    r   r   rintr   r   r\   )ra   r?   rK   r@   n_dimplus_1all_combinationsn_subpopulations           r)   _check_subparamsz"TheilSenRegressor._check_subparamsi  sE   ( 	NEEE#i'' --3VL)-L-L   J&&<''%)%7?TTRF$!6&%>>   (  9,,$((.|Y(G(G   - ui00Lq"'%	<*H*H"I"IJJc$"8:JKKLL_,,r+   c                     	
                                    t           j                                       d          \  j        \  
}                     
|          \   _        t          
           _         j	        rt          d                     j                             t          d                    
                     t           j        
z            }t          d                    |                     t          d                     j                             t          j        t          
                     j        k    r+t#          t%          t'          
                              }n"
fdt'           j                  D             }t)           j                  }t          j        ||          	 t/          | j	                  	 fd	t'          |          D                       }t          j        |          }t3          | j         j        
          \   _        } j        r|d          _        |dd          _        nd _        | _         S )aU  Fit linear model.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            Training data.
        y : ndarray of shape (n_samples,)
            Target values.

        Returns
        -------
        self : returns an instance of self.
            Fitted `TheilSenRegressor` estimator.
        T)	y_numericzBreakdown point: {0}zNumber of samples: {0}zTolerable outliers: {0}zNumber of subpopulations: {0}c                 @    g | ]}                     d           S )F)sizereplace)choice).0_r?   r@   rX   s     r)   
<listcomp>z)TheilSenRegressor.fit.<locals>.<listcomp>  s>        ##IL%#PP  r+   )r]   rY   c              3   n   K   | ]/} t          t                    |         j                  V  0d S r`   )r   rR   rJ   )rr   jobr!   
index_listra   rH   s     r)   	<genexpr>z(TheilSenRegressor.fit.<locals>.<genexpr>  s\       @
 @
 GFOOAq*S/43EFF@
 @
 @
 @
 @
 @
r+   )r0   r8   r   r
   Nr   ) _validate_paramsr   rX   _validate_datar   rk   n_subpopulation_rA   
breakdown_rY   printr7   r   r   rf   r   r\   listr   r4   r	   r]   array_splitr   vstackr<   r0   r8   n_iter_rJ   
intercept_coef_)ra   r!   rH   rK   tol_outliersrI   r]   rL   coefsrw   r?   r@   rX   s   ```      @@@@r)   fitzTheilSenRegressor.fit  s    	)$*;<<""1a4"881 !	:.2.C.Cz/
 /
+d+ +9lCC< 	Q(//@@AAA*11)<<===t:;;L+22<@@AAA1889NOOPPP 75L1122d6LLL<i(8(8,GGHHGG     t455  G
 "$+..^GV44
?(&$,??? @
 @
 @
 @
 @
 @
 @
V}}@
 @
 @
 
 
 )G$$-dm
 
 
e  	#AhDOqrrDJJ!DODJr+   )__name__
__module____qualname____doc__r   r   r   r^   dict__annotations__rb   rk   r   r>   r+   r)   rT   rT      s        r rj $+&htQVDDDEx(Xh4???@sD8889'(";$ $D         .#- #- #-J: : : : :r+   rT   )r,   r-   )'r   r5   numbersr   r   	itertoolsr   numpyr   scipyr   scipy.specialr   scipy.linalg.lapackr   joblibr	   _baser   baser   utilsr   utils._param_validationr   utils.parallelr   r   
exceptionsr   finfodoubleepsr   r*   r<   rA   rR   rT   r>   r+   r)   <module>r      s     " " " " " " " " " " " " " "                 0 0 0 0 0 0 # # # # # #       ! ! ! ! ! ! & & & & & & . . . . . . . . . . . . . . + + + + + +28BI"0 0 0f5" 5" 5" 5"p  6) ) )Xx x x x x x x x x xr+   