o
    0Gf_                     @  s   d Z ddlmZ ddlmZ ddlmZ ddlZddl	m
Z
mZmZmZ ddlm  mZ ddlmZmZ ddlmZ d	d
 Zdd Zdd ZG dd dZG dd deZG dd deZdd Zdd Zd ddZdd Z d!ddZ!dS )"zr
Base tools for handling various kinds of data structures, attaching metadata to
results, and doing data cleaning
    )annotations)lmap)reduceN)	DataFrameSeriesisnull
MultiIndex)cache_readonlycache_writable)MissingDataErrorc                 C  s0   t | jdkrt |  jdkrd S d S d S N   )npasarrayndimsqueezex r   5lib/python3.10/site-packages/statsmodels/base/data.py_asarray_2dcolumns   s   $r   c                 C  sB   t | } | jdkr| dddf } t jt| dddddf S )zy
    Makes sure input is an array and is 2d. Makes sure output is 2d. True
    indicates a null in the rows of 2d x.
    r   NZaxis)r   r   r   anyr   r   r   r   r   _asarray_2d_null_rows   s   

r   c                  G  s0   t | dkr| dggf7 } dd }t||  S )z
    Returns a boolean array which is True where any of the rows in any
    of the _2d_ arrays in arrs are NaNs. Inputs can be any mixture of Series,
    DataFrames or array_like.
    r   Fc                 S  s0   t | do| jtko| }tt| |t|B S )Ndtype)hasattrr   boolr   Z
logical_orr   )r   yZx_is_boolean_arrayr   r   r   _nan_row_maybe_two_inputs.   s   

z,_nan_rows.<locals>._nan_row_maybe_two_inputs)lenr   r   )Zarrsr   r   r   r   	_nan_rows%   s   r    c                   @  sF  e Zd ZdZdZdZdCddZdd Zdd	 Zd
d Z	e
dd Ze
dd Ze
dd Zdd Ze dd Ze dDddZedd Zejdd Zedd Zejdd Zed d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ ZdEd-d.Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;d< Z$d=d> Z%d?d@ Z&dAdB Z'dS )F	ModelDatazi
    Class responsible for handling input data and extracting metadata into the
    appropriate form
    Nnonec           	      K  s  t |s
t |rddlm} t|d|v r|d| _d|v r(|d| _|dkrW| j|||fi |\}}|| _	| j
| | j| _| j| _| | j| j\| _| _n| j
| || _|| _| ||\| _| _d | _d| _| | |   i | _d S )Nr   )recarray_exceptiondesign_infoformular"   )	data_utilZ_is_recarraystatsmodels.tools.sm_exceptionsr#   NotImplementedErrorpopr$   r%   handle_missingZmissing_row_idx__dict__updateendog
orig_endogexog	orig_exog_convert_endog_exog	const_idx
k_constant_handle_constant_check_integrity_cache)	selfr-   r/   missinghasconstkwargsr#   ZarraysZnan_idxr   r   r   __init__>   s6   


zModelData.__init__c                 C  s0   ddl m } || j}d|v r|d= d|d< |S )Nr   )copyr$   Trestore_design_info)r<   r+   )r7   r<   dr   r   r   __getstate__\   s   
zModelData.__getstate__c           
      C  s   d|v r_ddl m}m} g }z|d }W n ty&   |d |d }Y nw dD ]*}z||d ||d	d
\}}W  n t|fyS }	 z||	 W Y d }	~	q)d }	~	ww |d |j| _|d= | j	| d S )Nr=   r   )	dmatrices
PatsyErrorframer.   r0   )      r   r      r%   Z	dataframe)Zeval_envZreturn_type)
Zpatsyr@   rA   KeyErrorjoin	NameErrorappendr$   r+   r,   )
r7   r>   r@   rA   excdatadepth_Zdesigner   r   r   __setstate__d   s.   

zModelData.__setstate__c                 C  s  |du s	| j d u rd| _d | _d S d}tj| j dd}t| s&tdtj| j dd}t	||kd 
 }|j| _| jdkrX| j d d |f  dkrUt|| _nQd}nN| jdkrg }|D ] }| j d d |f  }|dkr|d| _t|| _ n || qat|dk}	|	 rd| _t||	  | _nd}n	| jdkrd}n	 |r|stt| j jd | j f}
tj|
}tj| j }t||k| _d | _d S |rd| _d S d S )NFr   r   zexog contains inf or nansr   T)r/   r3   r2   r   maxZisfiniteallr   minwherer   sizeZmeanintrJ   arrayr   ZargmaxZcolumn_stackZonesshapeZlinalgZmatrix_rank)r7   r9   Zcheck_implicitZexog_maxZexog_minr2   valuesidxvalueposZaugmented_exogZ	rank_augmZ	rank_origr   r   r   r4   }   sV   






zModelData._handle_constantc                 C  s   || S Nr   clsr   nan_maskr   r   r   
_drop_nans   s   zModelData._drop_nansc                 C  s   || d d |f S r]   r   r^   r   r   r   _drop_nans_2d   s   zModelData._drop_nans_2dc                   s  g } dd}|durd}g }|du r|dg7 }n|dur'||f}ddg}n|f}dg}|dg7 }d}	g }
tr D ]S\}}|du sMt|dkrS||g7 }q>|jdkrf|t|f7 }||g7 }q>| jdkr{|t|f7 }||g7 }q>|jdkr|	t|f7 }	|
|g7 }
q>td	|dur|d}|rt| }|j	d j	d krtd
|  }|O |	rt|	}|j	d j	d krtd|dur||  O }n|  }|O nt| |	rtdddf f|	  t
s9tt||}|	r|tt|
|	 |r|fdd|D  |dur5|d|i |dur5|d|i |g fS |dkrBtd|dkr  fdd} fdd}tt|t||}|dur|dur| } ||}|dur ||}|d|i |dur|d|i |	r|tt|
t||	 |r|fdd|D  |t d  fS td| )zu
        This returns a dictionary with keys endog, exog and the keys of
        kwargs. It preserves Nones.
        missing_idxNr   r/   r-   r   r   rC   z6Arrays with more than 2 dimensions are not yet handledzBShape mismatch between endog/exog and extra arrays given to model.zEShape mismatch between endog/exog and extra 2d arrays given to model.c                      i | ]	}|  |d qS r]   get.0kr:   r   r   
<dictcomp>      z,ModelData.handle_missing.<locals>.<dictcomp>raisez!NaNs were encountered in the dataZdropc                        | S r]   )ra   r   r_   r`   r   r   <lambda>"      z*ModelData.handle_missing.<locals>.<lambda>c                   rn   r]   )rb   r   ro   r   r   rp   #  rq   c                   rd   r]   re   rg   rj   r   r   rk   6  rl   z missing option %s not understood)r)   r   itemsr   r   r   r   
ValueErrorr    rX   r   dictzipr,   r   r   ra   rT   tolist)r_   r-   r/   r8   r:   Znone_array_namesrc   ZcombinedZcombined_namesZcombined_2dZcombined_2d_nameskeyZvalue_arrayZupdated_row_maskZcombined_nansZcombined_2d_nansZ	drop_nansZdrop_nans_2dr   )r_   r:   r`   r   r*      s   
















zModelData.handle_missingc                 C  sT   |  |}d }|d ur&| |}|jdkr|d d d f }|jdkr&td||fS )Nr   rC   zexog is not 1d or 2d)	_get_yarr	_get_xarrr   rs   )r7   r-   r/   ZyarrZxarrr   r   r   r1   =  s   



zModelData._convert_endog_exogc                 C  s:   | j }| |}|st| j}t|dkr|d S t|S )Nr   r   )r.   
_get_names_make_endog_namesr-   r   list)r7   r-   ynamesr   r   r   r}   K  s   

zModelData.ynamesreturnlist[str] | Nonec                 C  s2   | j }|d ur| |}|st| j}t|S d S r]   )r0   rz   _make_exog_namesr/   r|   )r7   r/   xnamesr   r   r   r   W  s   

zModelData.xnamesc                 C  s   | j p| jS r]   )_param_namesr   r7   r   r   r   param_namesa  s   zModelData.param_namesc                 C  
   || _ d S r]   )r   )r7   rY   r   r   r   r   f  s   
c                 C  s   | j dur| j S | jS )z
        Labels for covariance matrices

        In multidimensional models, each dimension of a covariance matrix
        differs from the number of param_names.

        If not set, returns param_names
        N)
_cov_namesr   r   r   r   r   	cov_namesj  s   
zModelData.cov_namesc                 C  r   r]   )r   )r7   r[   r   r   r   r   y  s   
c                 C  s0   | j }|d ur| |}|S | j}| |}|S r]   )r0   _get_row_labelsr.   )r7   r/   
row_labelsr-   r   r   r   r   ~  s   

zModelData.row_labelsc                 C  s   d S r]   r   r7   Zarrr   r   r   r        zModelData._get_row_labelsc                 C  sl   t |trt |jtrdd |jD S t|jS t |tr&|jr$|jgS d S z|jjW S  t	y5   Y d S w )Nc                 S  s    g | ]}d  dd |D qS )rN   c                 s  s    | ]}|r|V  qd S r]   r   )rh   levelr   r   r   	<genexpr>  s    z2ModelData._get_names.<locals>.<listcomp>.<genexpr>)rH   )rh   cr   r   r   
<listcomp>  s    z(ModelData._get_names.<locals>.<listcomp>)

isinstancer   columnsr   r|   r   namer   namesAttributeErrorr   r   r   r   rz     s    



zModelData._get_namesc                 C  sZ   t |r
t |}t|}t|dkr)|jdkr|S |jdkr)t| gS | S r   )r&   _is_structured_ndarraystruct_to_ndarrayr   r   r   r   r   )r7   r-   r   r   r   rx     s   




zModelData._get_yarrc                 C  s   t |r
t |}t|S r]   )r&   r   r   r   r   )r7   r/   r   r   r   ry     s   


zModelData._get_xarrc                 C  s.   | j d urt| j t| jkrtdd S d S )Nz+endog and exog matrices are different sizes)r/   r   r-   rs   r   r   r   r   r5     s
   
zModelData._check_integrityr   c                 C  s   |dkr	|  |S |dkr| |S |dkr| |S |dkr$| |S |dkr-| |S |dkr6| |S |dkr@| ||S |dkrJ| ||S |d	krS| |S |d
kr\| 	|S |S )Nr   ZrowsZcovZdatesZ
columns_eqZcov_eqZgeneric_columnsZgeneric_columns_2dr}   Zmultivariate_confint)
attach_columnsattach_rows
attach_covattach_datesattach_columns_eqattach_cov_eqattach_generic_columnsattach_generic_columns_2dattach_ynamesattach_mv_confint)r7   objZhowr   r   r   r   wrap_output  s*   







zModelData.wrap_outputc                 C     |S r]   r   r7   resultr   r   r   r     r   zModelData.attach_columnsc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_columns_eqc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_covc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_cov_eqc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_rowsc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_datesc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_mv_confintc                 O  r   r]   r   r7   r   argsr:   r   r   r   r     r   z ModelData.attach_generic_columnsc                 O  r   r]   r   r   r   r   r   r     r   z#ModelData.attach_generic_columns_2dc                 C  r   r]   r   r   r   r   r   r     r   zModelData.attach_ynames)Nr"   N)r~   r   )r   N)(__name__
__module____qualname____doc__r   r   r;   r?   rP   r4   classmethodra   rb   r*   r1   r
   r}   r   propertyr   setterr   r	   r   r   rz   rx   ry   r5   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   6   sZ    
=


z
	




	
r!   c                   @  s   e Zd Zdd ZdS )	PatsyDatac                 C  s   |j jS r]   )r$   column_namesr   r   r   r   rz     s   zPatsyData._get_namesN)r   r   r   rz   r   r   r   r   r     s    r   c                      s   e Zd ZdZd! fdd	Ze fddZe fddZ fd	d
Zdd Z	dd Z
d!ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z  ZS )"
PandasDataz^
    Data handling class which knows how to reattach pandas metadata to model
    results
    Nc                   sR   t |}|d u r|nt |}|jtks|d ur"|jtkr"tdt ||S )NzRPandas data cast to numpy dtype of object. Check input data with np.asarray(data).)r   r   r   objectrs   superr1   r7   r-   r/   	__class__r   r   r1     s
   
zPandasData._convert_endog_exogc                   s&   t |ttfr|j| S t ||S r]   )r   r   r   locr   ra   r^   r   r   r   ra     s   
zPandasData._drop_nansc                   s4   t |ttfr|j| jd d |f S t ||S r]   )r   r   r   r   r   rb   r^   r   r   r   rb     s   zPandasData._drop_nans_2dc                   sR   | j | j}}|d ur"t|dr"t|dr"| j j| jjs"tdt   d S )Nindexz.The indices for endog and exog are not aligned)r.   r0   r   r   Zequalsrs   r   r5   r   r   r   r   r5     s   zPandasData._check_integrityc                 C  s$   z|j W S  ty   | jj  Y S w r]   )r   r   r.   r   r   r   r   r     s
   zPandasData._get_row_labelsc                 C  s   t | |d }t||dS )Nr   )getattrr   )r7   r   r   r   r   r   r   r      s   z!PandasData.attach_generic_columnsc                 C  s.   |p|}t | |d }t | |d }t|||dS Nr   r   )r   r   )r7   r   ZrownamesZcolnamesr   r   r   r   %  s   z$PandasData.attach_generic_columns_2dc                 C  s&   |j dkrt|| jdS t|| jdS )Nr   r   )r   r   r   r   r   r   r   r   r   +  s   
zPandasData.attach_columnsc                 C  s   t || j| jdS r   )r   r   r}   r   r   r   r   r   4     zPandasData.attach_columns_eqc                 C     t || j| jdS r   )r   r   r   r   r   r   r   7  r   zPandasData.attach_covc                 C  r   r   )r   r}   r   r   r   r   r   :  r   zPandasData.attach_cov_eqc                 C  s   |  }tj| jddjd }|dkr!|j|fkr!|d d d f }|jdk r+t|}nt|}| j|_| j	t
| d  |_|S )Nr   Zndminr   rC   )r   r   rW   r}   rX   r   r   r   r   r   r   r   )r7   r   squeezedk_endogoutr   r   r   r   =  s   

zPandasData.attach_rowsc                 C  sx   |  }tj| jddjd }|dkr$|j|fkr$t|d d d f }|jdk r0t|| jdS t	t|| j| jdS )Nr   r   r   rC   r   r   )
r   r   rW   r}   rX   r   r   r   Zpredict_datesr   )r7   r   r   r   r   r   r   r   M  s   

zPandasData.attach_datesc                 C  s   t |d| jddgdS )N)rF   rC   lowerupperr   )r   Zreshaper   r   r   r   r   r   Z  s   
zPandasData.attach_mv_confintc                 C  s.   |  }|jdk rt|| jdS t|| jdS )NrC   )r   )r   )r   r   r   r}   r   )r7   r   r   r   r   r   r   _  s   
zPandasData.attach_ynamesr]   )r   r   r   r   r1   r   ra   rb   r5   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r     s&    		
	r   c                 C  s>   | j dks| jd dkrdg}|S dd t| jd D }|S )Nr   r   c                 S  s   g | ]}d |d  qS )zy%dr   r   rh   ir   r   r   r   l  s    z%_make_endog_names.<locals>.<listcomp>)r   rX   range)r-   r}   r   r   r   r{   h  s
   r{   c                 C  sj   |  d}|dk r$| }dd td| jd D }||d |S dd td| jd d D }|S )Nr   c                 S     g | ]}d | qS zx%dr   r   r   r   r   r   w      z$_make_exog_names.<locals>.<listcomp>r   constc                 S  r   r   r   r   r   r   r   r   z  r   )varr   Zargminr   rX   insert)r/   Zexog_varr2   Z
exog_namesr   r   r   r   q  s   
r   r"   c                 K  sH   t | |}|dkrt| |d}|| |d fS |j| |fd|i|S )Nr"   )r-   r/   r8   )handle_data_class_factoryrt   r,   r*   )r-   r/   r8   r:   klassZret_dictr   r   r   r*     s   

r*   c                 C  sh   t | |r
t}|S t | |rt}|S t | |rt}|S t | |r(t}|S tdt	| t	|f )z
    Given inputs
    z%unrecognized data structures: %s / %s)
r&   Z_is_using_ndarray_typer!   Z_is_using_pandasr   Z_is_using_patsyr   Z_is_using_ndarrayrs   type)r-   r/   r   r   r   r   r     s   	r   c                 K  sR   t | ttfrt| } t |ttfrt|}t| |}|| f|||d|S )N)r/   r8   r9   )r   r|   tupler   r   r   )r-   r/   r8   r9   r:   r   r   r   r   handle_data  s   


r   )Nr"   )r"   N)"r   Z
__future__r   Zstatsmodels.compat.pythonr   	functoolsr   Znumpyr   Zpandasr   r   r   r   Zstatsmodels.tools.dataZtoolsrL   r&   Zstatsmodels.tools.decoratorsr	   r
   r'   r   r   r   r    r!   r   r   r{   r   r*   r   r   r   r   r   r   <module>   s.       :v	
	