o
    0GfV                     @   s   d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
Z	 dddZddd	Zdd
dZdd Zdd Z		dddZdddZi fddZG dd dZG dd deZdS )    )RegularizedResults)_calc_nodewise_row_calc_nodewise_weight_calc_approx_inv_cov)LikelihoodModelResults)OLSNc                 C   "   |du rt d| jdi |jS )a  estimates the regularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized

    Returns
    -------
    An array of the parameters for the regularized fit
    NzD_est_regularized_naive currently requires that fit_kwds not be None. )
ValueErrorfit_regularizedparamsmodpnum
partitionsfit_kwdsr	   r	   Glib/python3.10/site-packages/statsmodels/base/distributed_estimation.py_est_regularized_naiveK      r   c                 C   r   )a  estimates the unregularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit

    Returns
    -------
    An array of the parameters for the fit
    NzF_est_unregularized_naive currently requires that fit_kwds not be None.r	   )r
   fitr   r   r	   r	   r   _est_unregularized_naivee   r   r   c                 C   sN   t | d }t | }t|}| D ]}||7 }q|| }d|t||k < |S )a   joins the results from each run of _est_<type>_naive
    and returns the mean estimate of the coefficients

    Parameters
    ----------
    params_l : list
        A list of arrays of coefficients.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   )lennpzerosabs)Zparams_l	thresholdpr   	params_mnr   r	   r	   r   _join_naive   s   

r   c                 C   s.   | j t|fi | }||d|  7 }|S )a  calculates the log-likelihood gradient for the debiasing

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.
    L1_wt : scalar
        The fraction of the penalty given to the L1 penalty term.
        Must be between 0 and 1 (inclusive).  If 0, the fit is
        a ridge fit, if 1 it is a lasso fit.
    score_kwds : dict-like or None
        Keyword arguments for the score function.

    Returns
    -------
    An array-like object of the same dimension as params

    Notes
    -----
    In general:

    gradient l_k(params)

    where k corresponds to the index of the partition

    For OLS:

    X^T(y - X^T params)
       )Zscorer   asarray)r   r   alphaL1_wt
score_kwdsgradr	   r	   r   
_calc_grad   s   &r%   c                 C   s4   t | jt |fi |}|dddf | j S )a  calculates the weighted design matrix necessary to generate
    the approximate inverse covariance matrix

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    hess_kwds : dict-like or None
        Keyword arguments for the hessian function.

    Returns
    -------
    An array-like object, updated design matrix, same dimension
    as mod.exog
    N)r   ZsqrtZhessian_factorr    exog)r   r   	hess_kwdsZrhessr	   r	   r   _calc_wdesign_mat   s   r(   c                 C   s  |du ri n|}|du ri n|}|du rt d|d }d|v r%|d }nd}| jj\}}	ttd|	 | }
| jdi |j}t| ||||| }t	| ||}g }g }t
||
 t|d |
 |	D ]}t|||}|| t||||}|| qc||||fS )a  estimates the regularized fitted parameters, is the default
    estimation_method for class DistributedModel.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    mnum : scalar
        Index of current partition.
    partitions : scalar
        Total number of partitions.
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized
    score_kwds : dict-like or None
        Keyword arguments for the score function.
    hess_kwds : dict-like or None
        Keyword arguments for the Hessian function.

    Returns
    -------
    A tuple of parameters for regularized fit
        An array-like object of the fitted parameters, params
        An array-like object for the gradient
        A list of array like objects for nodewise_row
        A list of array like objects for nodewise_weight
    NzG_est_regularized_debiased currently requires that fit_kwds not be None.r!   r"   r   g      ?r	   )r
   r&   shapeintr   Zceilr   r   r%   r(   rangeminr   appendr   )r   Zmnumr   r   r#   r'   r!   r"   Znobsr   Zp_partr   r$   Zwexognodewise_row_lnodewise_weight_lidxZnodewise_rowZnodewise_weightr	   r	   r   _est_regularized_debiased   s.   
 
r1   c                 C   s   t | d d }t | }t|}t|}g }g }| D ]}||d 7 }||d 7 }||d  ||d  qt|}t|}|| }|d| 9 }t||}	||	| }
d|
t|
|k < |
S )a  joins the results from each run of _est_regularized_debiased
    and returns the debiased estimate of the coefficients

    Parameters
    ----------
    results_l : list
        A list of tuples each one containing the params, grad,
        nodewise_row and nodewise_weight values for each partition.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   r         g      )r   r   r   extendZarrayr   dotr   )	results_lr   r   r   r   Zgrad_mnr.   r/   rZapprox_inv_covZdebiased_paramsr	   r	   r   _join_debiased  s&   




r8   c           	      C   sJ   | j  }|| | j||fi |}| j||| jfd|i| j}|S )a  handles the model fitting for each machine. NOTE: this
    is primarily handled outside of DistributedModel because
    joblib cannot handle class methods.

    Parameters
    ----------
    self : DistributedModel class instance
        An instance of DistributedModel.
    pnum : scalar
        index of current partition.
    endog : array_like
        endogenous data for current partition.
    exog : array_like
        exogenous data for current partition.
    fit_kwds : dict-like
        Keywords needed for the model fitting.
    init_kwds_e : dict-like
        Additional init_kwds to add for each partition.

    Returns
    -------
    estimation_method result.  For the default,
    _est_regularized_debiased, a tuple.
    r   )	init_kwdscopyupdatemodel_classestimation_methodr   estimation_kwds)	selfr   endogr&   r   init_kwds_eZtemp_init_kwdsmodelresultsr	   r	   r   _helper_fit_partitionH  s   

rD   c                   @   sH   e Zd ZdZ				dddZ		dddZ	ddd	Z	dd
dZdS )DistributedModela  
    Distributed model class

    Parameters
    ----------
    partitions : scalar
        The number of partitions that the data will be split into.
    model_class : statsmodels model class
        The model class which will be used for estimation. If None
        this defaults to OLS.
    init_kwds : dict-like or None
        Keywords needed for initializing the model, in addition to
        endog and exog.
    init_kwds_generator : generator or None
        Additional keyword generator that produces model init_kwds
        that may vary based on data partition.  The current usecase
        is for WLS and GLS
    estimation_method : function or None
        The method that performs the estimation for each partition.
        If None this defaults to _est_regularized_debiased.
    estimation_kwds : dict-like or None
        Keywords to be passed to estimation_method.
    join_method : function or None
        The method used to recombine the results from each partition.
        If None this defaults to _join_debiased.
    join_kwds : dict-like or None
        Keywords to be passed to join_method.
    results_class : results class or None
        The class of results that should be returned.  If None this
        defaults to RegularizedResults.
    results_kwds : dict-like or None
        Keywords to be passed to results class.

    Attributes
    ----------
    partitions : scalar
        See Parameters.
    model_class : statsmodels model class
        See Parameters.
    init_kwds : dict-like
        See Parameters.
    init_kwds_generator : generator or None
        See Parameters.
    estimation_method : function
        See Parameters.
    estimation_kwds : dict-like
        See Parameters.
    join_method : function
        See Parameters.
    join_kwds : dict-like
        See Parameters.
    results_class : results class
        See Parameters.
    results_kwds : dict-like
        See Parameters.

    Notes
    -----

    Examples
    --------
    Nc
           
      C   s   || _ |d u rt| _n|| _|d u ri | _n|| _|d u r!t| _n|| _|d u r,i | _n|| _|d u r7t| _n|| _|d u rBi | _	n|| _	|d u rMt
| _n|| _|	d u rYi | _d S |	| _d S N)r   r   r<   r9   r1   r=   r>   r8   join_method	join_kwdsr   results_classresults_kwds)
r?   r   r<   r9   r=   r>   rG   rH   rI   rJ   r	   r	   r   __init__  s2   

zDistributedModel.__init__
sequentialc           	      C   s   |du ri }|dkr|  |||}n|dkr| ||||}ntd| | j|fi | j}| jdgdgfi | j}| j||fi | jS )ae  Performs the distributed estimation using the corresponding
        DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like or None
            Keywords needed for the model fitting.
        parallel_method : str
            type of distributed estimation to be used, currently
            "sequential", "joblib" and "dask" are supported.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        NrL   Zjoblibz.parallel_method: %s is currently not supportedr   )	fit_sequential
fit_joblibr
   rG   rH   r<   r9   rI   rJ   )	r?   data_generatorr   Zparallel_methodparallel_backendinit_kwds_generatorr6   r   Zres_modr	   r	   r   r     s"   zDistributedModel.fitc                 C   s   g }|du r t |D ]\}\}}t| ||||}|| q
|S t t||}	|	D ]\}\\}}}
t| |||||
}|| q)|S )a*  Sequentially performs the distributed estimation using
        the corresponding DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        N)	enumeraterD   r-   zip)r?   rO   r   rQ   r6   r   r@   r&   rC   tup_genrA   r	   r	   r   rM     s"   

zDistributedModel.fit_sequentialc           
         sF  ddl m} |tj\} }|du r(|du r(| fddt|D }|S |durT|du rT| | fddt|D }W d   |S 1 sMw   Y  |S |du rr|durrtt||}	| fdd|	D }|S |dur|durtt||}	| | fdd|	D }W d   |S 1 sw   Y  |S )	a  Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        r   )parallel_funcNc                 3   (    | ]\}\}} |||V  qd S rF   r	   .0r   r@   r&   fr   r?   r	   r   	<genexpr>c      
z.DistributedModel.fit_joblib.<locals>.<genexpr>c                 3   rV   rF   r	   rW   rY   r	   r   r[   i  r\   c                 3   .    | ]\}\\}}} ||||V  qd S rF   r	   rX   r   r@   r&   r9   rY   r	   r   r[   o      c                 3   r]   rF   r	   r^   rY   r	   r   r[   v  r_   )Zstatsmodels.tools.parallelrU   rD   r   rR   rS   )
r?   rO   r   rP   rQ   rU   ZparZn_jobsr6   rT   r	   rY   r   rN   D  s@   



zDistributedModel.fit_joblib)NNNNNNNN)NrL   NNrF   )__name__
__module____qualname____doc__rK   r   rM   rN   r	   r	   r	   r   rE   m  s    ?
/
:
0rE   c                       s(   e Zd ZdZ fddZdd Z  ZS )DistributedResultsaT  
    Class to contain model results

    Parameters
    ----------
    model : class instance
        Class instance for model used for distributed data,
        this particular instance uses fake data and is really
        only to allow use of methods like predict.
    params : ndarray
        Parameter estimates from the fit model.
    c                    s   t  || d S rF   )superrK   )r?   rB   r   	__class__r	   r   rK     s   zDistributedResults.__init__c                 O   s   | j j| j|g|R i |S )a  Calls self.model.predict for the provided exog.  See
        Results.predict.

        Parameters
        ----------
        exog : array_like NOT optional
            The values for which we want to predict, unlike standard
            predict this is NOT optional since the data in self.model
            is fake.
        *args :
            Some models can take additional arguments. See the
            predict method of the model for the details.
        **kwargs :
            Some models can take additional keywords arguments. See the
            predict method of the model for the details.

        Returns
        -------
            prediction : ndarray, pandas.Series or pandas.DataFrame
            See self.model.predict
        )rB   predictr   )r?   r&   argskwargsr	   r	   r   rh     s   zDistributedResults.predict)r`   ra   rb   rc   rK   rh   __classcell__r	   r	   rf   r   rd   }  s    rd   rF   )r   )NNN)Zstatsmodels.base.elastic_netr   Z(statsmodels.stats.regularized_covariancer   r   r   Zstatsmodels.base.modelr   Z#statsmodels.regression.linear_modelr   Znumpyr   r   r   r   r%   r(   r1   r8   rD   rE   rd   r	   r	   r	   r   <module>   s*    
C

+

A.
%  