o
    0GfC'                     @   sz   d Z ddlmZmZ ddlmZ ddlZddlZddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ G d	d
 d
ZdS )a  
Author: Kishan Manani
License: BSD-3 Clause

An implementation of MSTL [1], an algorithm for time series decomposition when
there are multiple seasonal components.

This implementation has the following differences with the original algorithm:
- Missing data must be handled outside of this class.
- The algorithm proposed in the paper handles a case when there is no
seasonality. This implementation assumes that there is at least one seasonal
component.

[1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021)
MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple
Seasonal Patterns
https://arxiv.org/pdf/2107.13462.pdf
    )OptionalUnion)SequenceN)boxcox)ArrayLike1D)STL)freq_to_periodc                   @   s  e Zd ZdZdddddddedeeeee f  deeeee f  deee	e
f  d	ed
eee
eeedf f  fddZdd Zdd Zdeeee df deeee df deee ee f fddZdeeee df dee fddZdeeee df dedee fddZdefddZedeee ee f fddZed
edefddZededee fd d!Zed"d# ZdS )$MSTLa
  
    MSTL(endog, periods=None, windows=None, lmbda=None, iterate=2,
         stl_kwargs=None)

    Season-Trend decomposition using LOESS for multiple seasonalities.

    .. versionadded:: 0.14.0

    Parameters
    ----------
    endog : array_like
        Data to be decomposed. Must be squeezable to 1-d.
    periods : {int, array_like, None}, optional
        Periodicity of the seasonal components. If None and endog is a pandas
        Series or DataFrame, attempts to determine from endog. If endog is a
        ndarray, periods must be provided.
    windows : {int, array_like, None}, optional
        Length of the seasonal smoothers for each corresponding period.
        Must be an odd integer, and should normally be >= 7 (default). If None
        then default values determined using 7 + 4 * np.arange(1, n + 1, 1)
        where n is number of seasonal components.
    lmbda : {float, str, None}, optional
        The lambda parameter for the Box-Cox transform to be applied to `endog`
        prior to decomposition. If None, no transform is applied. If "auto", a
        value will be estimated that maximizes the log-likelihood function.
    iterate : int, optional
        Number of iterations to use to refine the seasonal component.
    stl_kwargs: dict, optional
        Arguments to pass to STL.

    See Also
    --------
    statsmodels.tsa.seasonal.STL

    References
    ----------
    .. [1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021)
        MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with
        Multiple Seasonal Patterns. arXiv preprint arXiv:2107.13462.

    Examples
    --------
    Start by creating a toy dataset with hourly frequency and multiple seasonal
    components.

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> import pandas as pd
    >>> pd.plotting.register_matplotlib_converters()
    >>> np.random.seed(0)
    >>> t = np.arange(1, 1000)
    >>> trend = 0.0001 * t ** 2 + 100
    >>> daily_seasonality = 5 * np.sin(2 * np.pi * t / 24)
    >>> weekly_seasonality = 10 * np.sin(2 * np.pi * t / (24 * 7))
    >>> noise = np.random.randn(len(t))
    >>> y = trend + daily_seasonality + weekly_seasonality + noise
    >>> index = pd.date_range(start='2000-01-01', periods=len(t), freq='h')
    >>> data = pd.DataFrame(data=y, index=index)

    Use MSTL to decompose the time series into two seasonal components
    with periods 24 (daily seasonality) and 24*7 (weekly seasonality).

    >>> from statsmodels.tsa.seasonal import MSTL
    >>> res = MSTL(data, periods=(24, 24*7)).fit()
    >>> res.plot()
    >>> plt.tight_layout()
    >>> plt.show()

    .. plot:: plots/mstl_plot.py
    N   )periodswindowslmbdaiterate
stl_kwargsendogr   r   r   r   r   c                C   sX   || _ | || _| jjd | _|| _| ||\| _| _|| _	| 
|r&|ni | _d S )Nr   )r   _to_1d_array_yshapenobsr   _process_periods_and_windowsr   r   r   _remove_overloaded_stl_kwargs_stl_kwargs)selfr   r   r   r   r   r    r   8lib/python3.10/site-packages/statsmodels/tsa/stl/mstl.py__init__h   s   


zMSTL.__init__c                 C   s  t | j}|dkrdn| j}| jdkr t| jdd\}}|| _n| jr,t| j| jd}n| j}| jdd}| jdd}t	j
|| jfd}|}t|D ]1}	t|D ]*}
|||
  }td|| j|
 | j|
 d| jj||d	}|j||
< |||
  }qRqLt	|j}|j}|j}|| }t| jtjtjfr| jj}tj||d
d}tj||dd}tj||dd}tj||dd}dd | jD }|jdkrtj||dd}ntj|||d}ddlm} ||||||S )z
        Estimate a trend component, multiple seasonal components, and a
        residual component.

        Returns
        -------
        DecomposeResult
            Estimation results.
           autoN)r   
inner_iter
outer_iter)r   r   periodseasonal)r   r   Zobserved)indexnametrendresidZrobust_weightc                 S   s   g | ]}d | qS )Z	seasonal_r   .0r!   r   r   r   
<listcomp>   s    zMSTL.fit.<locals>.<listcomp>r"   )r#   columnsr   )DecomposeResultr   )lenr   r   r   r   r   Z	est_lmbdar   popnpZzerosr   ranger   r   fitr"   squeezeTr%   Zweights
isinstancer   pdSeries	DataFramer#   ndimZstatsmodels.tsa.seasonalr+   )r   num_seasonsr   yr   Zstl_inner_iterZstl_outer_iterr"   Zdeseas_iresr%   rwr&   r#   Zcolsr+   r   r   r   r0   ~   sV   




zMSTL.fitc              	   C   s&   d| j  d| j d| j d| j d	S )NzMSTL(endog, periods=z
, windows=z, lmbda=z
, iterate=))r   r   r   r   r   r   r   r   __str__   s   zMSTL.__str__returnc                    s     |}|r j|t|d} ||\}}n j|t|d}t|}t|t|kr2tdt fdd|D rVtdt	 t
 fdd|D }|d t| }||fS )N)r8   )Periods and windows must have same lengthc                 3   s    | ]
}| j d  kV  qdS r
   Nr   r'   r?   r   r   	<genexpr>   s    z4MSTL._process_periods_and_windows.<locals>.<genexpr>zTA period(s) is larger than half the length of time series. Removing these period(s).c                 3   s"    | ]}| j d  k r|V  qdS rC   rD   r'   r?   r   r   rE      s    )_process_periods_process_windowsr,   _sort_periods_and_windowssorted
ValueErroranywarningswarnUserWarningtuple)r   r   r   r   r?   r   r      s$   
z!MSTL._process_periods_and_windowsc                 C   s0   |d u r|   f}|S t|tr|f}|S 	 |S N)_infer_periodr3   int)r   r   r   r   r   rF      s   

zMSTL._process_periodsr8   c                 C   s0   |d u r|  |}|S t|tr|f}|S 	 |S rP   )_default_seasonal_windowsr3   rR   )r   r   r8   r   r   r   rG      s   

zMSTL._process_windowsc                 C   sD   d }t | jtjtjfrt| jjdd }|d u rtdt|}|S )NZinferred_freqz%Unable to determine period from endog)	r3   r   r4   r5   r6   getattrr#   rJ   r   )r   Zfreqr!   r   r   r   rQ      s   zMSTL._infer_periodc                 C   s6   t | t |krtdttt| | \} }| |fS )NrB   )r,   rJ   ziprI   )r   r   r   r   r   rH     s   zMSTL._sort_periods_and_windowsc                 C   s"   g d}|D ]}|  |d  q| S )Nr    )r-   )r   argsargr   r   r   r     s   z"MSTL._remove_overloaded_stl_kwargsnc                 C   s   t dd td| d D S )Nc                 s   s    | ]	}d d|  V  qdS )      Nr   )r(   r;   r   r   r   rE     s    z1MSTL._default_seasonal_windows.<locals>.<genexpr>r   )rO   r/   )rX   r   r   r   rS     s   zMSTL._default_seasonal_windowsc                 C   s2   t jt t | t jd}|jdkrtd|S )N)Zdtyper   zy must be a 1d array)r.   Zascontiguousarrayr1   ZasarrayZdoubler7   rJ   )xr9   r   r   r   r     s   
zMSTL._to_1d_array)__name__
__module____qualname____doc__r   r   r   rR   r   floatstrdictboolr   r0   r@   rO   r   rF   rG   rQ   staticmethodrH   r   rS   r   r   r   r   r   r	       sh    K
@	


	r	   )r_   typingr   r   Zcollections.abcr   rL   Znumpyr.   Zpandasr4   Zscipy.statsr   Zstatsmodels.tools.typingr   Zstatsmodels.tsa.stl._stlr   Zstatsmodels.tsa.tsatoolsr   r	   r   r   r   r   <module>   s    