o
    0Gf                     @   s~  d dl mZmZmZmZ d dlmZmZ d dlZ	d dl
mZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ ee	jejej f Z!ee"ej#f Z$d
Z%G dd deZ&G dd de&eZ'G dd de'Z(G dd de&Z)G dd de&eZ*G dd de*Z+G dd de&eZ,G dd de,e*Z-G dd de,Z.G dd de,e'Z/G dd  d Z0dS )!    )PD_LT_2_2_0Appenderis_int_indexto_numpy)ABCabstractmethodN)OptionalUnion)HashableSequence)qr)d_or_f)	bool_like
float_likerequired_int_likestring_like)freq_to_periodzstart is less than the first observation in the index. Values can only be created for observations after the start of the index.
c                
   @   s  e Zd ZdZdZedefddZede	e
 dejfddZe		dd
ede	e
 dee	e
  dejfddZedefddZdefddZeedee
df fddZede	e
 dejfddZe		ddejd
edee	e
  dejfddZdefddZdedefddZd	S )DeterministicTermz/Abstract Base Class for all Deterministic TermsFreturnc                 C      | j S )z?Flag indicating whether the values produced are dummy variables)	_is_dummyself r   =lib/python3.10/site-packages/statsmodels/tsa/deterministic.pyis_dummy*      zDeterministicTerm.is_dummyindexc                 C      dS )aR  
        Produce deterministic trends for in-sample fitting.

        Parameters
        ----------
        index : index_like
            An index-like object. If not an index, it is converted to an
            index.

        Returns
        -------
        DataFrame
            A DataFrame containing the deterministic terms.
        Nr   r   r   r   r   r   	in_sample/       zDeterministicTerm.in_sampleNstepsforecast_indexc                 C   r   )a1  
        Produce deterministic trends for out-of-sample forecasts

        Parameters
        ----------
        steps : int
            The number of steps to forecast
        index : index_like
            An index-like object. If not an index, it is converted to an
            index.
        forecast_index : index_like
            An Index or index-like object to use for the forecasts. If
            provided must have steps elements.

        Returns
        -------
        DataFrame
            A DataFrame containing the deterministic terms.
        Nr   )r   r"   r   r#   r   r   r   out_of_sample@   r!   zDeterministicTerm.out_of_samplec                 C   r   )z.A meaningful string representation of the termNr   r   r   r   r   __str__[   r!   zDeterministicTerm.__str__c                 C   s   t | jf}t|| j S N)type__name__hash_eq_attr)r   namer   r   r   __hash___   s   zDeterministicTerm.__hash__.c                 C   r   )z9tuple of attributes that are used for equality comparisonNr   r   r   r   r   r*   c   r!   zDeterministicTerm._eq_attrc                 C   s4   t | tjr| S zt| W S  ty   tdw )Nz*index must be a pandas Index or index-like)
isinstancepdIndex	Exception	TypeErrorr   r   r   r   _index_likeh   s   zDeterministicTerm._index_likec           
      C   s  |dur(t |}t|tjsJ |jd |kr&td|jd  d| d|S t| tjr;tj| d d || j	dS t| tj
r\| j	dur\tj| d | j	d	d
d }tj|| j	|d
S t| tjrt| tjsjJ z| j}| j}W n ty   t| dkr| d | d  nd}| d | }Y nw |||  }tj|||dS t| rtt| dkrt| d d | d | d }t|S ddl}|jdtd	d | jd }	t|	d |	| d S )zExtend the forecast indexNr   z(The number of values in forecast_index (z) must match steps (z).   periodsfreq   r8   r7   stepzOnly PeriodIndexes, DatetimeIndexes with a frequency set, RangesIndexes, and Index with a unit increment support extending. The index is set will contain the position relative to the data length.)
stacklevel)r   r3   r-   r.   r/   shape
ValueErrorPeriodIndexperiod_ranger8   DatetimeIndex
date_range
RangeIndexr=   stopAttributeErrorlenr   npalldiffarangewarningswarnUserWarning)
r   r"   r#   Znext_obsr=   startrF   Zidx_arrrM   nobsr   r   r   _extend_indexq   sR   

  

zDeterministicTerm._extend_indexc                 C   s   |   dt| d S )Nz at 0x0x)r%   idr   r   r   r   __repr__      zDeterministicTerm.__repr__otherc                 C   sJ   t |t| r#| j}|j}t|t|krdS tdd t||D S dS )NFc                 S   s   g | ]\}}||kqS r   r   ).0abr   r   r   
<listcomp>       z,DeterministicTerm.__eq__.<locals>.<listcomp>)r-   r'   r*   rH   rJ   zip)r   rW   Zown_attrZoth_attrr   r   r   __eq__   s   zDeterministicTerm.__eq__r&   )r(   
__module____qualname____doc__r   propertyboolr   r   r   r
   r.   	DataFramer    intr   r$   strr%   r,   tupler*   staticmethodr/   r3   rR   rU   objectr^   r   r   r   r   r   $   sN    

2r   c                   @   s   e Zd ZdZddededdfdd	Zedefd
dZedefddZ	ede
e fddZdejdejfddZdefddZdS )TimeTrendDeterministicTermz:Abstract Base Class for all Time Trend Deterministic TermsTr   constantorderr   Nc                 C   s   t |d| _t|d| _d S )Nrk   rl   )r   	_constantr   _orderr   rk   rl   r   r   r   __init__   s   z#TimeTrendDeterministicTerm.__init__c                 C   r   )z+Flag indicating that a constant is included)rm   r   r   r   r   rk      r   z#TimeTrendDeterministicTerm.constantc                 C   r   )zOrder of the time trendrn   r   r   r   r   rl      r   z TimeTrendDeterministicTerm.orderc                 C   sb   g }dddd}| j r|d td| jd D ]}||v r&|||  q|d|  q|S )NtrendZtrend_squaredZtrend_cubed)r5   r9      constr5   ztrend**)rm   appendrangern   )r   columnsZtrend_namespowerr   r   r   _columns   s   
z#TimeTrendDeterministicTerm._columnslocsc                 C   sb   t | j| j }t|d|f}tjd|ft d}td| jd |dt | jd f< ||C }|S )Nr5   Zdtyper   )re   rm   rn   rI   ZtilezerosrL   )r   rz   Zntermstermsrx   r   r   r   
_get_terms   s   $z%TimeTrendDeterministicTerm._get_termsc                 C   sP   g }| j r
|d | jr|d| jd   |sdg}d|}d| dS )NZConstantzPowers 1 to r5   ZEmpty,z
TimeTrend())rm   ru   rn   join)r   r}   Z	terms_strr   r   r   r%      s   

z"TimeTrendDeterministicTerm.__str__Tr   )r(   r_   r`   ra   rc   re   rp   rb   rk   rl   listrf   ry   rI   ndarrayr~   r%   r   r   r   r   rj      s    rj   c                
       s   e Zd ZdZddededdf fdd	Zed
edd fddZ	e
ejjdeee ejf dejfddZe
ejj	ddedeee ejf deee  dejfddZedeedf fddZ  ZS )	TimeTrendao  
    Constant and time trend determinstic terms

    Parameters
    ----------
    constant : bool
        Flag indicating whether a constant should be included.
    order : int
        A non-negative int containing the powers to include (1, 2, ..., order).

    See Also
    --------
    DeterministicProcess
    Seasonality
    Fourier
    CalendarTimeTrend

    Examples
    --------
    >>> from statsmodels.datasets import sunspots
    >>> from statsmodels.tsa.deterministic import TimeTrend
    >>> data = sunspots.load_pandas().data
    >>> trend_gen = TimeTrend(True, 3)
    >>> trend_gen.in_sample(data.index)
    Tr   rk   rl   r   Nc                    s   t  || d S r&   )superrp   ro   	__class__r   r   rp      s   zTimeTrend.__init__rr   c                 C   s4   | d}d}d|v rd}nd|v rd}| ||dS )aY  
        Create a TimeTrend from a string description.

        Provided for compatibility with common string names.

        Parameters
        ----------
        trend : {"n", "c", "t", "ct", "ctt"}
            The string representation of the time trend. The terms are:

            * "n": No trend terms
            * "c": A constant only
            * "t": Linear time trend only
            * "ct": A constant and a time trend
            * "ctt": A constant, a time trend and a quadratic time trend

        Returns
        -------
        TimeTrend
            The TimeTrend instance.
        cr   ttr9   tr5   rk   rl   
startswith)clsrr   rk   rl   r   r   r   from_string  s   
zTimeTrend.from_stringr   c                 C   sR   |  |}|jd }tjd|d tjdd d d f }| |}tj|| j|dS Nr   r5   r{   rw   r   )	r3   r?   rI   rL   doubler~   r.   rd   ry   )r   r   rQ   rz   r}   r   r   r   r    !  s
   

"
zTimeTrend.in_sampler"   r#   c                 C   sh   |  |}|jd }| |||}tj|d || d tjdd d d f }| |}tj|| j	|dS r   )
r3   r?   rR   rI   rL   r   r~   r.   rd   ry   )r   r"   r   r#   rQ   fcast_indexrz   r}   r   r   r   r$   +  s   

*
zTimeTrend.out_of_sample.c                 C      | j | jfS r&   )rm   rn   r   r   r   r   r*   9     zTimeTrend._eq_attrr   r&   )r(   r_   r`   ra   rc   re   rp   classmethodrf   r   r   r   r    r	   r   r
   r.   r/   rd   r$   r   rb   rg   r*   __classcell__r   r   r   r   r      s0    

	
 r   c                
   @   s&  e Zd ZdZdZddededdfdd	Zedefd
dZedefddZ	e
deee ejejf dd fddZedeedf fddZdefddZedee fddZeejjdeee ejf dejfddZeejj	ddedeee ejf deee  dejfddZdS ) Seasonalitya   
    Seasonal dummy deterministic terms

    Parameters
    ----------
    period : int
        The length of a full cycle. Must be >= 2.
    initial_period : int
        The seasonal index of the first observation. 1-indexed so must
        be in {1, 2, ..., period}.

    See Also
    --------
    DeterministicProcess
    TimeTrend
    Fourier
    CalendarSeasonality

    Examples
    --------
    Solar data has an 11-year cycle

    >>> from statsmodels.datasets import sunspots
    >>> from statsmodels.tsa.deterministic import Seasonality
    >>> data = sunspots.load_pandas().data
    >>> seas_gen = Seasonality(11)
    >>> seas_gen.in_sample(data.index)

    To start at a season other than 1

    >>> seas_gen = Seasonality(11, initial_period=4)
    >>> seas_gen.in_sample(data.index)
    Tr5   periodinitial_periodr   Nc                 C   sR   t |d| _t |d| _|dk rtdd| j  kr"|ks'td tdd S )Nr   r   r9   zperiod must be >= 2r5   z-initial_period must be in {1, 2, ..., period})r   _period_initial_periodr@   )r   r   r   r   r   r   rp   c  s   zSeasonality.__init__c                 C   r   )zThe period of the seasonalityr   r   r   r   r   r   m  r   zSeasonality.periodc                 C   r   )z+The seasonal index of the first observation)r   r   r   r   r   r   r  r   zSeasonality.initial_periodr   c                 C   sh   |  |}t|tjr|j}nt|tjr|jr|jn|j}ntd|du r+tdt	|}| |dS )aF  
        Construct a seasonality directly from an index using its frequency.

        Parameters
        ----------
        index : {DatetimeIndex, PeriodIndex}
            An index with its frequency (`freq`) set.

        Returns
        -------
        Seasonality
            The initialized Seasonality instance.
        z,index must be a DatetimeIndex or PeriodIndexNz+index must have a freq or inferred_freq set)r   )
r3   r-   r.   rA   r8   rC   inferred_freqr1   r@   r   )r   r   r8   r   r   r   r   
from_indexw  s   

zSeasonality.from_index.c                 C   r   r&   )r   r   r   r   r   r   r*     r   zSeasonality._eq_attrc                 C      d| j  dS )NzSeasonality(period=r   r   r   r   r   r   r%        zSeasonality.__str__c                 C   s:   | j }g }td|d D ]}|d| d| d q|S )Nr5   s(r   r   )r   rv   ru   )r   r   rw   ir   r   r   ry     s
   zSeasonality._columnsc                 C   sp   |  |}|jd }| j}t||f}| jd }t|D ]}|| | }d||d ||f< qtj|| j	|dS Nr   r5   r   )
r3   r?   r   rI   r|   r   rv   r.   rd   ry   )r   r   rQ   r   termoffsetr   colr   r   r   r      s   


zSeasonality.in_sampler"   r#   c                 C   s   |  |}| |||}|jd }| j}t||f}| jd }t|D ]}	|| |	 | }
d||	d ||
f< q$tj	|| j
|dS r   )r3   rR   r?   r   rI   r|   r   rv   r.   rd   ry   )r   r"   r   r#   r   rQ   r   r   r   r   Zcol_locr   r   r   r$     s   


zSeasonality.out_of_sample)r5   r&   )r(   r_   r`   ra   r   re   rp   rb   r   r   r   r	   r   r
   r.   rC   rA   r   rg   r*   rf   r%   r   ry   r   r   r    r/   rd   r$   r   r   r   r   r   r   >  sH    "



r   c                   @   sJ   e Zd ZdZdeddfddZedefddZd	ej	dej	fd
dZ
dS )FourierDeterministicTermz7Abstract Base Class for all Fourier Deterministic Termsrl   r   Nc                 C   s   t |d| _d S Nr}   )r   rn   )r   rl   r   r   r   rp     s   z!FourierDeterministicTerm.__init__c                 C   r   )z'The order of the Fourier terms includedrq   r   r   r   r   rl     r   zFourierDeterministicTerm.orderrz   c                 C   s   dt j |t j }t |jd d| j f}t| jD ]!}tt j	t j
fD ]\}}||d | |d d d| | f< q'q|S )Nr9   r   r5   )rI   piastyper   emptyr?   rn   rv   	enumeratesincos)r   rz   r}   r   jfuncr   r   r   r~     s   &z#FourierDeterministicTerm._get_terms)r(   r_   r`   ra   re   rp   rb   rl   rI   r   r~   r   r   r   r   r     s    r   c                
       s   e Zd ZdZdZdedef fddZedefdd	Z	ede
e fd
dZeejjdeee ejf dejfddZeejj	ddedeee ejf deee  dejfddZedeedf fddZdefddZ  ZS )Fouriera  
    Fourier series deterministic terms

    Parameters
    ----------
    period : int
        The length of a full cycle. Must be >= 2.
    order : int
        The number of Fourier components to include. Must be <= 2*period.

    See Also
    --------
    DeterministicProcess
    TimeTrend
    Seasonality
    CalendarFourier

    Notes
    -----
    Both a sine and a cosine term are included for each i=1, ..., order

    .. math::

       f_{i,s,t} & = \sin\left(2 \pi i \times \frac{t}{m} \right)  \\
       f_{i,c,t} & = \cos\left(2 \pi i \times \frac{t}{m} \right)

    where m is the length of the period.

    Examples
    --------
    Solar data has an 11-year cycle

    >>> from statsmodels.datasets import sunspots
    >>> from statsmodels.tsa.deterministic import Fourier
    >>> data = sunspots.load_pandas().data
    >>> fourier_gen = Fourier(11, order=2)
    >>> fourier_gen.in_sample(data.index)
    Fr   rl   c                    s4   t  | t|d| _d| j | jkrtdd S )Nr   r9   z2 * order must be <= period)r   rp   r   r   rn   r@   )r   r   rl   r   r   r   rp     s
   zFourier.__init__r   c                 C   r   )zThe period of the Fourier termsr   r   r   r   r   r     r   zFourier.periodc              
   C   sV   | j }t| }g }td| jd D ]}dD ]}|| d| d| d qq|S )Nr5   r   r   (r   r   )r   r   striprv   rn   ru   )r   r   Z
fmt_periodrw   r   typr   r   r   ry     s   zFourier._columnsr   c                 C   s<   |  |}|jd }| t|| j }tj||| jdS Nr   r   rw   )	r3   r?   r~   rI   rL   r   r.   rd   ry   )r   r   rQ   r}   r   r   r   r      s   

zFourier.in_sampleNr"   r#   c                 C   sP   |  |}| |||}|jd }| t||| | j }tj||| j	dS r   )
r3   rR   r?   r~   rI   rL   r   r.   rd   ry   )r   r"   r   r#   r   rQ   r}   r   r   r   r$     s
   

zFourier.out_of_sample.c                 C   r   r&   r   rn   r   r   r   r   r*   ,  r   zFourier._eq_attrc                 C   s   d| j  d| j dS )NzFourier(period=, order=r   r   r   r   r   r   r%   0  s   zFourier.__str__r&   )r(   r_   r`   ra   r   floatre   rp   rb   r   r   rf   ry   r   r   r    r	   r   r
   r.   r/   rd   r$   r   rg   r*   r%   r   r   r   r   r   r     s8    &
	

r   c                	   @   s   e Zd ZdZdeddfddZedefddZd	ee	j
e	jf dejfd
dZe	j
e	jffd	e	jdeeeedf f dee	j
e	jf fddZdS )CalendarDeterministicTermz4Abstract Base Class for calendar deterministic termsr8   r   Nc                 C   s6   zt jd|dd}|j| _W d S  ty   tdw )Nz
2020-01-01r5   r:   z freq is not understood by pandas)r.   rD   r8   _freqr@   )r   r8   r   r   r   r   rp   7  s   z"CalendarDeterministicTerm.__init__c                 C      | j jS z(The frequency of the deterministic termsr   freqstrr   r   r   r   r8   >     zCalendarDeterministicTerm.freqr   c                 C   sX   t |tjr
| }||| j  }|| j}|d  |  }t|t| S )Nr5   )r-   r.   rA   to_timestamp	to_periodr   r   )r   r   Zdeltar   Zgapr   r   r   _compute_ratioC  s   z(CalendarDeterministicTerm._compute_ratioallowed.c                 C   s   t |tr|f}t ||sJt|dkrd|d j }n!ddd |d d D }t|dkr3|d	7 }|d
|d j 7 }t| j d| }t|t |tjtjfsUJ |S )Nr5   za r   z, c                 s   s    | ]}|j V  qd S r&   )r(   )rX   rY   r   r   r   	<genexpr>[  s    z>CalendarDeterministicTerm._check_index_type.<locals>.<genexpr>r4   r9   r   z and z! terms can only be computed from )	r-   r'   rH   r(   r   r1   r.   rC   rA   )r   r   r   Zallowed_typesmsgr   r   r   _check_index_typeM  s    

z+CalendarDeterministicTerm._check_index_type)r(   r_   r`   ra   rf   rp   rb   r8   r	   r.   rC   rA   rI   r   r   r/   r'   rg   r   r   r   r   r   r   4  s(    
r   c                
       s   e Zd ZdZdededdf fddZedee fdd	Z	e
ejjd
eee ejf dejfddZe
ejj	dded
eee ejf deee  dejfddZedeedf fddZdefddZ  ZS )CalendarFouriera  
    Fourier series deterministic terms based on calendar time

    Parameters
    ----------
    freq : str
        A string convertible to a pandas frequency.
    order : int
        The number of Fourier components to include. Must be <= 2*period.

    See Also
    --------
    DeterministicProcess
    CalendarTimeTrend
    CalendarSeasonality
    Fourier

    Notes
    -----
    Both a sine and a cosine term are included for each i=1, ..., order

    .. math::

       f_{i,s,t} & = \sin\left(2 \pi i \tau_t \right)  \\
       f_{i,c,t} & = \cos\left(2 \pi i \tau_t \right)

    where m is the length of the period and :math:`\tau_t` is the frequency
    normalized time.  For example, when freq is "D" then an observation with
    a timestamp of 12:00:00 would have :math:`\tau_t=0.5`.

    Examples
    --------
    Here we simulate irregularly spaced hourly data and construct the calendar
    Fourier terms for the data.

    >>> import numpy as np
    >>> import pandas as pd
    >>> base = pd.Timestamp("2020-1-1")
    >>> gen = np.random.default_rng()
    >>> gaps = np.cumsum(gen.integers(0, 1800, size=1000))
    >>> times = [base + pd.Timedelta(gap, unit="s") for gap in gaps]
    >>> index = pd.DatetimeIndex(pd.to_datetime(times))

    >>> from statsmodels.tsa.deterministic import CalendarFourier
    >>> cal_fourier_gen = CalendarFourier("D", 2)
    >>> cal_fourier_gen.in_sample(index)
    r8   rl   r   Nc                    s(   t  | t| | t|d| _d S r   )r   rp   r   r   rn   )r   r8   rl   r   r   r   rp     s   zCalendarFourier.__init__c              
   C   sH   g }t d| jd D ]}dD ]}|| d| d| jj d qq
|S )Nr5   r   r   z,freq=r   )rv   rn   ru   r   r   )r   rw   r   r   r   r   r   ry     s   "zCalendarFourier._columnsr   c                 C   s:   |  |}| |}| |}| |}tj||| jdS Nr   )r3   r   r   r~   r.   rd   ry   )r   r   ratior}   r   r   r   r      s
   



zCalendarFourier.in_sampler"   r#   c                 C   s^   |  |}| |||}| | t|tjtjfsJ | |}| |}tj	||| j
dS r   )r3   rR   r   r-   r.   rC   rA   r   r~   rd   ry   )r   r"   r   r#   r   r   r}   r   r   r   r$     s   



zCalendarFourier.out_of_sample.c                 C   s   | j j| jfS r&   r   r   rn   r   r   r   r   r*     s   zCalendarFourier._eq_attrc                 C   s   d| j j d| j dS )NzFourier(freq=r   r   r   r   r   r   r   r%     rV   zCalendarFourier.__str__r&   )r(   r_   r`   ra   rf   re   rp   rb   r   ry   r   r   r    r	   r   r
   r.   r/   rd   r$   r   rg   r*   r%   r   r   r   r   r   r   h  s2    0



r   c                
       s  e Zd ZdZdZer%ddddddddd	d	d
ddd
dddddZn ddddddid	d	ddddddddddd	iddddZdededdf fddZe	defddZ
e	defddZd eejejf dejfd!d"Zd eejejf dejfd#d$Zd eejejf dejfd%d&Zd eejejf dejfd'd(Zd eejejf dejfd)d*Ze	dee fd+d,Zeejjd eee ejf dejfd-d.Zeej j	d8d/e!d eee ejf d0e"ee  dejfd1d2Z e	de#ed3f fd4d5Z$defd6d7Z%  Z&S )9CalendarSeasonalitya  
    Seasonal dummy deterministic terms based on calendar time

    Parameters
    ----------
    freq : str
        The frequency of the seasonal effect.
    period : str
        The pandas frequency string describing the full period.

    See Also
    --------
    DeterministicProcess
    CalendarTimeTrend
    CalendarFourier
    Seasonality

    Examples
    --------
    Here we simulate irregularly spaced data (in time) and hourly seasonal
    dummies for the data.

    >>> import numpy as np
    >>> import pandas as pd
    >>> base = pd.Timestamp("2020-1-1")
    >>> gen = np.random.default_rng()
    >>> gaps = np.cumsum(gen.integers(0, 1800, size=1000))
    >>> times = [base + pd.Timedelta(gap, unit="s") for gap in gaps]
    >>> index = pd.DatetimeIndex(pd.to_datetime(times))

    >>> from statsmodels.tsa.deterministic import CalendarSeasonality
    >>> cal_seas_gen = CalendarSeasonality("H", "D")
    >>> cal_seas_gen.in_sample(index)
    T         )BDhH   r   r   rs   )MSM      )r   Qr   )Wr   r   AY)r   r   r   r   )r   ME)r   r   QEr   )r   r   )r   r   r   r   r   r   ZYEr8   r   r   Nc                    s   t  }|jdd | j D   t| j }t|dt|dd}t|d|dd}|| j| vr;td| d| d	t 	| || _
| jjd
d | _d S )Nc                 S   s   g | ]}t | qS r   )r   keys)rX   valr   r   r   r[     r\   z0CalendarSeasonality.__init__.<locals>.<listcomp>r8   F)optionslowerr   zThe combination of freq=z and period=z is not supported.-r   )setupdate
_supportedvaluesrg   r   r   r@   r   rp   r   r   r   split	_freq_str)r   r8   r   Zfreq_optionsZperiod_optionsr   r   r   rp     s(   zCalendarSeasonality.__init__c                 C   r   r   r   r   r   r   r   r8     r   zCalendarSeasonality.freqc                 C   r   )zThe full periodr   r   r   r   r   r     r   zCalendarSeasonality.periodr   c                 C   sb   | j jdv r|jd|j  S | j jdkr|jS tjdddj }|j}|| s/t	d|S )Nr   r   r   z2000-1-1
   )r7   z=freq is B but index contains days that are not business days.)
r   r   hourZ	dayofweekr.   Zbdate_rangeuniqueZisinrJ   r@   )r   r   Zbdayslocr   r   r   _weekly_to_loc"  s   z"CalendarSeasonality._weekly_to_locc                 C   s   |j S r&   )r   r   r   r   r   _daily_to_loc3  r   z!CalendarSeasonality._daily_to_locc                 C   s   |j d d S )Nr5   rs   )monthr   r   r   r   _quarterly_to_loc8  s   z%CalendarSeasonality._quarterly_to_locc                 C   s    | j jdv r|jd S |jd S )N)r   r   r   r5   )r   r   r   Zquarterr   r   r   r   _annual_to_loc=  s   

z"CalendarSeasonality._annual_to_locc                 C   s   | j dkr| |}n| j dkr| |}n| j dv r!| |}n| |}| j| j  | j }t|j	d |f}d|t
|j	d |f< |S )Nr   r   )r   r   r   r5   )r   r   r   r   r   r   r   rI   r|   r?   rL   )r   r   rz   Z
full_cycler}   r   r   r   r~   E  s   



zCalendarSeasonality._get_termsc              
   C   sN   g }| j | j | j }t|D ]}|d| j d|d  d| j d q|S )Nr   =r5   z	, period=r   )r   r   r   rv   ru   )r   rw   countr   r   r   r   ry   U  s   zCalendarSeasonality._columnsc                 C   s0   |  |}| |}| |}tj||| jdS r   )r3   r   r~   r.   rd   ry   )r   r   r}   r   r   r   r    _  s   


zCalendarSeasonality.in_sampler"   r#   c                 C   sT   |  |}| |||}| | t|tjtjfsJ | |}tj||| j	dS r   )
r3   rR   r   r-   r.   rC   rA   r~   rd   ry   )r   r"   r   r#   r   r}   r   r   r   r$   i  s   


z!CalendarSeasonality.out_of_sample.c                 C   r   r&   )r   r   r   r   r   r   r*   w  r   zCalendarSeasonality._eq_attrc                 C   r   )NzSeasonal(freq=r   )r   r   r   r   r   r%   {  r   zCalendarSeasonality.__str__r&   )'r(   r_   r`   ra   r   r   r   rf   rp   rb   r8   r   r	   r.   rC   rA   rI   r   r   r   r   r   r~   r   ry   r   r   r    r   r
   r/   rd   r$   re   r   rg   r*   r%   r   r   r   r   r   r     s    #

	








	
	
r   c                       sP  e Zd ZdZ		d!dddededed	eeee	f  d
df
 fddZ
ed
ee fddZe	d"deded	eeee	f  d
d fddZdeejejf dejd
ejfddZeejjdeee ejf d
ejfddZeejj	d"dedeee ejf deee  d
ejfddZed
eedf fddZd
efdd Z  Z S )#CalendarTimeTrenda
  
    Constant and time trend determinstic terms based on calendar time

    Parameters
    ----------
    freq : str
        A string convertible to a pandas frequency.
    constant : bool
        Flag indicating whether a constant should be included.
    order : int
        A non-negative int containing the powers to include (1, 2, ..., order).
    base_period : {str, pd.Timestamp}, default None
        The base period to use when computing the time stamps. This value is
        treated as 1 and so all other time indices are defined as the number
        of periods since or before this time stamp. If not provided, defaults
        to pandas base period for a PeriodIndex.

    See Also
    --------
    DeterministicProcess
    CalendarFourier
    CalendarSeasonality
    TimeTrend

    Notes
    -----
    The time stamp, :math:`\tau_t`, is the number of periods that have elapsed
    since the base_period. :math:`\tau_t` may be fractional.

    Examples
    --------
    Here we simulate irregularly spaced hourly data and construct the calendar
    time trend terms for the data.

    >>> import numpy as np
    >>> import pandas as pd
    >>> base = pd.Timestamp("2020-1-1")
    >>> gen = np.random.default_rng()
    >>> gaps = np.cumsum(gen.integers(0, 1800, size=1000))
    >>> times = [base + pd.Timedelta(gap, unit="s") for gap in gaps]
    >>> index = pd.DatetimeIndex(pd.to_datetime(times))

    >>> from statsmodels.tsa.deterministic import CalendarTimeTrend
    >>> cal_trend_gen = CalendarTimeTrend("D", True, order=1)
    >>> cal_trend_gen.in_sample(index)

    Next, we normalize using the first time stamp

    >>> cal_trend_gen = CalendarTimeTrend("D", True, order=1,
    ...                                   base_period=index[0])
    >>> cal_trend_gen.in_sample(index)
    Tr   Nbase_periodr8   rk   rl   r   r   c                   sh   t  | tj| ||d d| _|d ur$tj|d| jd}|jd | _|d u r-d | _	d S t|| _	d S )Nr   r   r5   r6   )
r   rp   rj   _ref_i8r.   rB   r   asi8rf   _base_period)r   r8   rk   rl   r   prr   r   r   rp     s    zCalendarTimeTrend.__init__c                 C   r   )zThe base period)r   r   r   r   r   r     r   zCalendarTimeTrend.base_periodrr   c                 C   s8   | d}d}d|v rd}nd|v rd}| ||||dS )a  
        Create a TimeTrend from a string description.

        Provided for compatibility with common string names.

        Parameters
        ----------
        freq : str
            A string convertible to a pandas frequency.
        trend : {"n", "c", "t", "ct", "ctt"}
            The string representation of the time trend. The terms are:

            * "n": No trend terms
            * "c": A constant only
            * "t": Linear time trend only
            * "ct": A constant and a time trend
            * "ctt": A constant, a time trend and a quadratic time trend
        base_period : {str, pd.Timestamp}, default None
            The base period to use when computing the time stamps. This value
            is treated as 1 and so all other time indices are defined as the
            number of periods since or before this time stamp. If not
            provided, defaults to pandas base period for a PeriodIndex.

        Returns
        -------
        TimeTrend
            The TimeTrend instance.
        r   r   r   r9   r   r5   r   r   )r   r8   rr   r   rk   rl   r   r   r   r     s   
#zCalendarTimeTrend.from_stringr   r   c                 C   sh   t |tjr|| j}|j}|| j d }|tj	| }|d d d f }| 
|}tj|| j|dS )Nr5   r   )r-   r.   rC   r   r   r   r   r   rI   r   r~   rd   ry   )r   r   r   Zindex_i8timer}   r   r   r   _terms  s   
zCalendarTimeTrend._termsc                 C   s*   |  |}| |}| |}| ||S r&   )r3   r   r   r  )r   r   r   r   r   r   r      s   


zCalendarTimeTrend.in_sampler"   r#   c                 C   sN   |  |}| |||}| | t|tjtjfsJ | |}| ||S r&   )	r3   rR   r   r-   r.   rA   rC   r   r  )r   r"   r   r#   r   r   r   r   r   r$     s   


zCalendarTimeTrend.out_of_sample.c                 C   s,   | j | j| jjf}| jd ur|| jf7 }|S r&   )rm   rn   r   r   r   )r   attrr   r   r   r*     s   
zCalendarTimeTrend._eq_attrc                 C   sR   t | }d|d d  d| jj d }| jd ur'|d d d| j d }|S )NZCalendarr4   z, freq=r   zbase_period=)rj   r%   r   r   r   )r   valuer   r   r   r%   &  s
   
 
zCalendarTimeTrend.__str__r   r&   )!r(   r_   r`   ra   rf   rc   re   r   r	   DateLikerp   rb   r   r   r   r.   rC   rA   rI   r   rd   r  r   r   r    r   r
   r/   r$   rg   r*   r%   r   r   r   r   r   r     sr    8*




r   c                   @   s  e Zd ZdZdddddddddeee ejf de	ee
ef  d	ed
edededee defddZedejfddZedee fddZdeej deej fddZdejdejfddZeejjdejfddZeejj	d3dede	eee ejf  dejfdd Zd!ejdeejejf fd"d#Zd$ed!edejfd%d&Zd$ejd!ejdejfd'd(Zd)ed*edejfd+d,Z d$ee!e"ef d!ee!e"ef dejfd-d.Z#d4d/d0Z$d1d2 Z%dS )5DeterministicProcessa  
    Container class for deterministic terms.

    Directly supports constants, time trends, and either seasonal dummies or
    fourier terms for a single cycle. Additional deterministic terms beyond
    the set that can be directly initialized through the constructor can be
    added.

    Parameters
    ----------
    index : {Sequence[Hashable], pd.Index}
        The index of the process. Should usually be the "in-sample" index when
        used in forecasting applications.
    period : {float, int}, default None
        The period of the seasonal or fourier components. Must be an int for
        seasonal dummies. If not provided, freq is read from index if
        available.
    constant : bool, default False
        Whether to include a constant.
    order : int, default 0
        The order of the tim trend to include. For example, 2 will include
        both linear and quadratic terms. 0 exclude time trend terms.
    seasonal : bool = False
        Whether to include seasonal dummies
    fourier : int = 0
        The order of the fourier terms to included.
    additional_terms : Sequence[DeterministicTerm]
        A sequence of additional deterministic terms to include in the process.
    drop : bool, default False
        A flag indicating to check for perfect collinearity and to drop any
        linearly dependent terms.

    See Also
    --------
    TimeTrend
    Seasonality
    Fourier
    CalendarTimeTrend
    CalendarSeasonality
    CalendarFourier

    Notes
    -----
    See the notebook `Deterministic Terms in Time Series Models
    <../examples/notebooks/generated/deterministics.html>`__ for an overview.

    Examples
    --------
    >>> from statsmodels.tsa.deterministic import DeterministicProcess
    >>> from pandas import date_range
    >>> index = date_range("2000-1-1", freq="M", periods=240)

    First a determinstic process with a constant and quadratic time trend.

    >>> dp = DeterministicProcess(index, constant=True, order=2)
    >>> dp.in_sample().head(3)
                const  trend  trend_squared
    2000-01-31    1.0    1.0            1.0
    2000-02-29    1.0    2.0            4.0
    2000-03-31    1.0    3.0            9.0

    Seasonal dummies are included by setting seasonal to True.

    >>> dp = DeterministicProcess(index, constant=True, seasonal=True)
    >>> dp.in_sample().iloc[:3,:5]
                const  s(2,12)  s(3,12)  s(4,12)  s(5,12)
    2000-01-31    1.0      0.0      0.0      0.0      0.0
    2000-02-29    1.0      1.0      0.0      0.0      0.0
    2000-03-31    1.0      0.0      1.0      0.0      0.0

    Fourier components can be used to alternatively capture seasonal patterns,

    >>> dp = DeterministicProcess(index, constant=True, fourier=2)
    >>> dp.in_sample().head(3)
                const  sin(1,12)  cos(1,12)  sin(2,12)  cos(2,12)
    2000-01-31    1.0   0.000000   1.000000   0.000000        1.0
    2000-02-29    1.0   0.500000   0.866025   0.866025        0.5
    2000-03-31    1.0   0.866025   0.500000   0.866025       -0.5

    Multiple Seasonalities can be captured using additional terms.

    >>> from statsmodels.tsa.deterministic import Fourier
    >>> index = date_range("2000-1-1", freq="D", periods=5000)
    >>> fourier = Fourier(period=365.25, order=1)
    >>> dp = DeterministicProcess(index, period=3, constant=True,
    ...                           seasonal=True, additional_terms=[fourier])
    >>> dp.in_sample().head(3)
                const  s(2,3)  s(3,3)  sin(1,365.25)  cos(1,365.25)
    2000-01-01    1.0     0.0     0.0       0.000000       1.000000
    2000-01-02    1.0     1.0     0.0       0.017202       0.999852
    2000-01-03    1.0     0.0     1.0       0.034398       0.999408
    NFr   r   r   rk   rl   seasonalfourieradditional_termsdropr   r   rk   rl   r  r  r	  r
  c          
      C   s  t |tjst|}|| _g | _d| _d | _|   t|ddd}t	|d | _
}t|d| _t	|d | _}t|d| _t|}d | _t	|d	| _|| _|sR|r[| jt|| |rc|rctd
|sg|rw|d u rw|d u rwt| j | _}|rt|d}| jt| n|rt|d}|d usJ | jt||d |D ]}	t |	tstd|	| jvr| j|	 qtd|| _d | _d S )NFr   T)Zoptionalrk   rl   r  r  r
  zseasonal and fourier can be initialized through the constructor since these will be necessarily perfectly collinear. Instead, you can pass additional components using the additional_terms input.)rl   zJAll additional terms must be instances of subsclasses of DeterministicTermzuOne or more terms in additional_terms has been added through the parameters of the constructor. Terms must be unique.)r-   r.   r/   _index_deterministic_terms_extendable_index_freq_validate_indexr   r   rm   r   rn   	_seasonal_fourierrg   _cached_in_sample_drop_additional_termsru   r   r@   r   r   r   r   r   r1   _retain_cols)
r   r   r   rk   rl   r  r  r	  r
  r   r   r   r   rp     sX   





zDeterministicProcess.__init__r   c                 C   r   )zThe index of the process)r  r   r   r   r   r     r   zDeterministicProcess.indexc                 C   r   )z/The deterministic terms included in the process)r  r   r   r   r   r}     r   zDeterministicProcess.termsr}   c           	      C   s   d }| j D ]}t|ttfr|p|j}q|d u r5d}|D ]}||jd k |jd dk@ }|p3| }q|}t| j D ]\}}|j	}|rV|rV|| jd d dd f ||< |pY|}q<|S )NFr   r5   )
r  r-   r   r   rk   ilocrJ   anyr   r   )	r   r}   Z	has_constZdtermr   Z	const_colZ
drop_firstr   r   r   r   r   _adjust_dummies  s"   

 
z$DeterministicProcess._adjust_dummiesc                 C   sz   t j|dkdd}t |r|jd d | f }|jdd|jddk}t |dkr;|| @ }|jd d | f }|S )Nr   Zaxisr5   )rI   rJ   r  r   maxminsumZ
duplicated)r   r}   Zall_zeroZis_constantZsurplus_constsr   r   r   _remove_zeros_ones  s   
z'DeterministicProcess._remove_zeros_onesc                 C   s  | j d ur| j S | j}| jstjt|jd df|dS g }| jD ]
}||	| q"| 
|}tj|dd}| |}| jrt|}t|ddd}|d }|d }tt|}	|	d |jd  ttj }
tt|	|
k}|j| }dg}d}td|jd D ]%}tj|d |d d |d f }||kr|| |}||kr nqt||kr|jd d |f }n|jd d t|d | f }|j| _|| _ |S )	Nr   r2   r5   r  rT)modeZpivotingr4   ) r  r  r  r.   rd   rI   r   r?   ru   r    r  concatr  r  r   r   absZdiagZfinfor   Zepsre   r  Trv   ZlinalgZmatrix_rankrH   r  sortrw   r  )r   r   	raw_termsr   r}   Z	terms_arrresr  pZabs_diagZtolZrankZrpxZkeepZ	last_rankr   Z	curr_rankr   r   r   r      sH   




$
 zDeterministicProcess.in_sampler"   r#   c                 C   s   t |d}| jr| jd u r|   | j}| js&tjt	|j
d df|dS g }| jD ]}||||| q+tj|dd}| jd usFJ |j
d t| jkrU|| j }|S )Nr"   r   r2   r5   r  )r   r  r  r    r  r  r.   rd   rI   r   r?   ru   r$   r   rH   )r   r"   r#   r   r$  r   r}   r   r   r   r$     s   


z"DeterministicProcess.out_of_samplerF   c                 C   s>   | j }t|tjrtj|d ||jdS tj|d || jdS )Nr   )endr8   )rP   r'  r8   )r  r-   r.   rA   rB   r8   rD   r  )r   rF   r   r   r   r   _extend_time_index1  s   z'DeterministicProcess._extend_time_indexrP   c                 C   s  | j }t|}t|tjs|sJ ||d k rttt|tjr%|j}nt|dkr2t	
| nd}|dkrJ||d  | dkrJtd| d|rVtt	||}ntj|||d}|d | j d krr|  }|j| }|S |d | j d kr|d | }|d |krtj|||d}	| j|	jd |	d}
|
j| S | j|jd |dS || j d k}|| }||  }|  j| }| j|jd |d}tj||gdd	S )
Nr   r5   z,The step of the index is not 1 (actual step=zM). start must be in the sequence that would have been generated by the index.r<   r4   )r#   )r"   r#   r  )r  r   r-   r.   rE   r@   START_BEFORE_INDEX_ERRr=   rH   rI   rK   r  r/   rL   r    r   r$   r?   r   )r   rP   rF   r   Zis_int64_indexZidx_stepnew_idxr    Z
next_valuetmpoosZin_sample_locZin_sample_idxZout_of_sample_idxZin_sample_exogZoos_exogr   r   r   _range_from_range_index:  sD   




z,DeterministicProcess._range_from_range_indexc                 C   s   | j }t| j tjr$t|tjr|j| jd}t|tjr$|j| jd}||d k r.tt|| j d kr>| 	 j
|| S | |}|||d k }| |jd |}||d kra|j
|| S tj| 	 |gdd}|j
|| S )N)r8   r   r4   r  )r  r-   r.   rA   	Timestampr   r  r@   r)  r    r   r(  r$   r?   r   )r   rP   rF   r   r*  Zoos_idxr,  Zbothr   r   r   _range_from_time_indexe  s"   
z+DeterministicProcess._range_from_time_indexr  r+   c                 C   s   |dk rt | d|| jjd k r| j| S || jjd d  d }| j}t| jtjr?tj|d | j|d}|d  S tj	|d | j|d}|d S )Nr   z must be non-negative.r5   r4   r:   )
r@   r  r?   r-   r.   rA   rB   r  r   rD   )r   r  r+   Zadd_periodsr   r   Zdrr   r   r   _int_to_timestampz  s   
z&DeterministicProcess._int_to_timestampc                 C   s   | j stdt| jtjfv st| jr)t|d}t|d}|d7 }| ||S t	|t
tjfr8| |d}nt|}t	|t
tjfrL| |d}nt|}| ||S )a  
        Deterministic terms spanning a range of observations

        Parameters
        ----------
        start : {int, str, dt.datetime, pd.Timestamp, np.datetime64}
            The first observation.
        stop : {int, str, dt.datetime, pd.Timestamp, np.datetime64}
            The final observation. Inclusive to match most prediction
            function in statsmodels.

        Returns
        -------
        DataFrame
            A data frame of deterministic terms
        zThe index in the deterministic process does not support extension. Only PeriodIndex, DatetimeIndex with a frequency, RangeIndex, and integral Indexes that start at 0 and have only unit differences can be extended when producing out-of-sample forecasts.
rP   rF   r5   )r  r1   r'   r  r.   rE   r   r   r-  r-   re   rI   integerr0  r.  r/  )r   rP   rF   r   r   r   rv     s    



zDeterministicProcess.rangec                 C   s   t | jtjr| jj| _d| _d S t | jtjr)| jjp| jj| _| jd u| _d S t | jtj	r5d| _d S t
| jrO| jd dkoKtt| jdk| _d S d S )NTr   r5   )r-   r  r.   rA   r8   r  r  rC   r   rE   r   rI   rJ   rK   r   r   r   r   r    s   




z$DeterministicProcess._validate_indexc              
   C   s&   t || j| j| j| j| j| j| jdS )ap  
        Create an identical determinstic process with a different index

        Parameters
        ----------
        index : index_like
            An index-like object. If not an index, it is converted to an
            index.

        Returns
        -------
        DeterministicProcess
            The deterministic process applied to a different index
        r  )r  r   rm   rn   r  r  r  r  r   r   r   r   apply  s   zDeterministicProcess.applyr&   )r   N)&r(   r_   r`   ra   r	   r   r
   r.   r/   r   r   re   rc   r   rp   rb   r   r   r}   rd   r  r  r   r    r$   r.  rC   rA   r(  r-  r/  rf   r0  IntLiker  rv   r  r2  r   r   r   r   r  .  s    a	

?

(
	+


-r  )1Zstatsmodels.compat.pandasr   r   r   r   abcr   r   ZdatetimeZdttypingr   r	   Zcollections.abcr
   r   ZnumpyrI   Zpandasr.   Zscipy.linalgr   Zstatsmodels.iolib.summaryr   Zstatsmodels.tools.validationr   r   r   r   Zstatsmodels.tsa.tsatoolsr   r.  Z
datetime64r  re   r1  r3  r)  r   rj   r   r   r   r   r   r   r   r   r  r   r   r   r   <module>   s:     2Z \4` 8 0