o
    NrfV                     @  s  d dl mZ d dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4 e5de6fi Z7dd Z8dd Z9e%e.ddddddZ:dd Z;dd Z<d d! Z=d"d# Z>G d$d% d%ZG d&d' d'eZ?dS )(    )annotationsN)Integral)is_datetime64_any_dtype)Rolling)normalize_arg)tokenize)BlockwiseDepDict)methods)check_axis_keyword_deprecation)Scalar_Frame_get_divisions_map_partitions_get_meta_map_partitions_maybe_from_pandasapply_and_enforcenew_dd_objectpartitionwise_graph)from_pandas)_maybe_align_partitions)insert_meta_param_descriptionis_dask_collectionis_dataframe_likeis_series_like)unpack_collections)HighLevelGraph)
no_default)Mapplyderived_fromfuncnamehas_keywordCombinedOutputc                 C  s   d}| d urt |tr| jd |krt||d ur*t |tr*|jd |kr*t|dd | ||fD }t|}t|| d urCt| nd |d urNt|fS d fS )NzqPartition size is less than overlapping window size. Try using ``df.repartition`` to increase the partition size.r   c                 S  s   g | ]}|d ur|qS N ).0pr#   r#   _/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/dataframe/rolling.py
<listcomp>9   s    z#_combined_parts.<locals>.<listcomp>)
isinstancer   shapeNotImplementedErrorr	   concatr!   len)Z	prev_partZcurrent_partZ	next_partbeforeaftermsgpartscombinedr#   r#   r&   _combined_parts*   s&   
r2   c                 O  s   dd |D }|d \}}}dd |D }| |i |}	|d u r"d }t |tjr*|}d }
|jd dkr=|	jd |jd  }
|rE|
rE||
9 }|d u rP|	j|d  S t |tjrX|}|r`|
r`||
9 }|	j||  S )Nc                 S     g | ]	}t |tr|qS r#   r(   r!   r$   dfr#   r#   r&   r'   F       z!overlap_chunk.<locals>.<listcomp>r   c                 S  s"   g | ]}t |tr|d  n|qS r   r4   r$   argr#   r#   r&   r'   I   s   " )r(   datetime	timedeltar)   iloc)funcr-   r.   argskwargsdfsr1   Zprev_part_lengthZnext_part_lengthoutZ	expansionr#   r#   r&   overlap_chunkE   s(   rC   T)metaenforce_metadatatransform_divisionsalign_dataframesc             
     s  t |st|rt|st|dn|}|f| }ttr"tt tr,t  ttj	s8t tj	rDt
|jjjsCtdnttrVdkrVt trV dksZtd|	dd}
|	dd}t| slJ |
dur~t g|R i |	}ndt|  }
t|  g|R i |	}|
 d	| }
|rt|}zt|}W n ty } zt| d
|d}~ww dd |D }t||| |	|tdd |D r|
dft| tdd |D f|	fi}tj|
||d}t||
S g }g }t|||| ||	 fdd}|D ]7}t|tr"||}| | | | q
t!|}t"|\}}|r;| | |#| q
| | q
i }d}|	$ D ]\}}t!|}t"|\}}|#| |||< |rfd}qJt%| drdd t&dd D }|'dt(| | fdd} |rt)t*|
|  g|R |t+d|}n|r|	n|}t)t+|
|  g|R i |d|i}tj|
||d}t,||
S )a]	  Apply a function to each partition, sharing rows with adjacent partitions.

    Parameters
    ----------
    func : function
        The function applied to each partition. If this function accepts
        the special ``partition_info`` keyword argument, it will receive
        information on the partition's relative location within the
        dataframe.
    df: dd.DataFrame, dd.Series
    args, kwargs :
        Positional and keyword arguments to pass to the function.
        Positional arguments are computed on a per-partition basis, while
        keyword arguments are shared across all partitions. The partition
        itself will be the first positional argument, with all other
        arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``,
        or regular Python objects. DataFrame-like args (both dask and
        pandas) will be repartitioned to align (if necessary) before
        applying the function; see ``align_dataframes`` to control this
        behavior.
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work,
        but it won't raise if dtypes don't match.
    before : int, timedelta or string timedelta
        The rows to prepend to partition ``i`` from the end of
        partition ``i - 1``.
    after : int, timedelta or string timedelta
        The rows to append to partition ``i`` from the beginning
        of partition ``i + 1``.
    transform_divisions : bool, default True
        Whether to apply the function onto the divisions and apply those
        transformed divisions to the output.
    align_dataframes : bool, default True
        Whether to repartition DataFrame- or Series-like args
        (both dask and pandas) so their divisions align before applying
        the function. This requires all inputs to have known divisions.
        Single-partition inputs will be split into multiple partitions.

        If False, all inputs must have either the same number of partitions
        or a single partition. Single-partition inputs will be broadcast to
        every partition of multi-partition inputs.
    $META

    See Also
    --------
    dd.DataFrame.map_overlap
       zMMust have a `DatetimeIndex` when using string offset for `before` and `after`r   z*before and after must be positive integerstokenNparent_metazoverlap--zx. If you don't want the partitions to be aligned, and are calling `map_overlap` directly, pass `align_dataframes=False`.c                 S  r3   r#   )r(   r   r5   r#   r#   r&   r'      r7   zmap_overlap.<locals>.<listcomp>c                 s  s    | ]}t |tV  qd S r"   )r(   r   r9   r#   r#   r&   	<genexpr>   s    zmap_overlap.<locals>.<genexpr>c                 S  s   g | ]}|j d fqS r8   )_namer9   r#   r#   r&   r'      s    dependenciesc                   s   i }t | \}}|| t|  \}}|| dt|  }tt||  |D ]\}\}}	}
||f}t||	|
 f||< q*tj	||| gd}t
||S )Nzoverlap-concat-rN   )_get_previous_partitionsupdate_get_nexts_partitionsr   	enumeratezipZ__dask_keys__r2   r   from_collectionsr   )r:   dskZprevs_parts_dskprevsZnexts_parts_dsknextsname_aiprevcurrentnextkeygraph)r.   r-   	divisionsrD   r#   r&   _handle_frame_argument   s   

z+map_overlap.<locals>._handle_frame_argumentTFpartition_infoc                 S  s   i | ]\}}|f||d qS ))numberdivisionr#   )r$   rZ   rd   r#   r#   r&   
<dictcomp>  s    zmap_overlap.<locals>.<dictcomp>c                   s    |i |d| iS )Nrb   r#   )rb   r?   r@   )	orig_funcr#   r&   r>     s   zmap_overlap.<locals>.func)rO   Z_func_metarO   )-r   r   r   r   r(   strpdZto_timedeltar;   r<   r   index_meta_nonemptyZinferred_type	TypeErrorr   
ValueErrorpopcallabler   r   r   r   r   allr   tupler   rU   r   r   r   appendr   r   extenditemsr    rS   insertr   r   r   rC   r   )r>   r6   r-   r.   rD   rE   rF   rG   r?   r@   namerJ   rI   erA   layerr_   Zargs2rO   ra   r:   Zarg2collectionsZkwargs3simplekvrb   rV   Zkwargs4r#   )r.   r-   r`   rD   rg   r&   map_overlap`   s   B









r~   c           
      C  s  i }| j }d}dt| | }|r<t|tr<g }td| jD ]}||f}tj||f|f||< || q|d ||fS t|t	j
rt| j jdd }	||	k rYt|g }td| jD ]}||f}t||d f||f|f||< || qa|d ||fS dg| j }||fS )zE
    Helper to get the nexts partitions required for the overlap
    ziPartition size is less than specified window. Try using ``df.repartition`` to increase the partition sizezoverlap-append-rH   Nrf   r   )rM   r   r(   r   rangenpartitionsr   headrs   r;   r<   rj   Seriesr`   diffr=   anyrn   _head_timedelta)
r6   r.   rV   df_nameZtimedelta_partition_messageZname_brX   rZ   r^   deltasr#   r#   r&   rR   =  s4   

rR   c                   s  i }| j  dt| | }|r7t|tr7dg}t| jd D ]}||f}tj |f|f||< || q||fS t|t	j
rt| j}| jdd }||k r|d }	dg}t| jd D ]Q}||d  }
t|
| |	}|| |}}||kr|dkr|||  }|d }||kr|dks~||f}t fddt||d D  |d f|f||< || q`||fS dg}t| jd D ]}||f}t |fg |d f|f||< || q||fS dg| j }||fS )zH
    Helper to get the previous partitions required for the overlap
    zoverlap-prepend-NrH   rf   r   c                   s   g | ]} |fqS r#   r#   )r$   r|   r   r#   r&   r'     s    z,_get_previous_partitions.<locals>.<listcomp>)rM   r   r(   r   r   r   r   tailrs   r;   r<   rj   r   r`   r   r=   r   max_tail_timedelta)r6   r-   rV   rY   rW   rZ   r^   Zdivsr   Zpt_zZpt_iZlbfirstjr#   r   r&   rP   b  sX   1

rP   c                 C  s   ||j | j  | k  S )zReturn rows of ``next_`` whose index is before the last
    observation in ``current`` + ``after``.

    Parameters
    ----------
    current : DataFrame
    next_ : DataFrame
    after : timedelta

    Returns
    -------
    overlapped : DataFrame
    )rk   r   )r\   Znext_r.   r#   r#   r&   r     s   r   c                   s   t  fdd| D }|S )a4  Return the concatenated rows of each dataframe in ``prevs`` whose
    index is after the first observation in ``current`` - ``before``.

    Parameters
    ----------
    current : DataFrame
    prevs : list of DataFrame objects
    before : timedelta

    Returns
    -------
    overlapped : DataFrame
    c                   s$   g | ]}||j j    k qS r#   )rk   min)r$   r[   r-   r\   r#   r&   r'     s   $ z#_tail_timedelta.<locals>.<listcomp>)r	   r+   )rW   r\   r-   selectedr#   r   r&   r     s   r   c                   @  sH  e Zd ZdZddddefddZdd Zedd	 Ze	d
d Z
dd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeed.ddZeed.dd Zeed!d" Zeed#d$ Zeed%d& Zee		'			d/d(d)Zeed*d+ ZeZd,d- ZdS )0r   z%Provides rolling window calculations.NFc                 C  sn   || _ || _|| _|| _|| _|| _|jjdi |   t	| jt
r%d nd| _| jdv r5tdt d S d S )Nfreq)rk   rH   rowszTUsing axis=1 in Rolling has been deprecated and will be removed in a future version.r#   )objwindowmin_periodscenteraxiswin_typerh   rolling_rolling_kwargsr(   int	_win_typewarningswarnFutureWarning)selfr   r   r   r   r   r   r#   r#   r&   __init__  s   	
zRolling.__init__c                 C  s.   | j | j| j| jd}| jtur| j|d< |S )N)r   r   r   r   r   )r   r   r   r   r   r   r   r@   r#   r#   r&   r     s   

zRolling._rolling_kwargsc                 C  s,   | j dv pt| jto| jdkp| jjdkS )zm
        Indicator for whether the object has a single partition (True)
        or multiple (False).
        )rH   columnsrH   )r   r(   r   r   r   r   r   r#   r#   r&   _has_single_partition  s
   

zRolling._has_single_partitionc                 O  sJ   t   | jdi |}W d    n1 sw   Y  t|||i |S )Nr#   )r
   r   getattr)r6   rolling_kwargsrw   r?   r@   r   r#   r#   r&   pandas_rolling_method  s   zRolling.pandas_rolling_methodc                 O  s   |   }| j| jj||g|R i |}| jr*| jj| j||g|R ||d|S | jr:| jd }| j| d }n| jdkrHt	
| j}d}n| jd }d}t| j| j||||g|R ||d|S )N)rI   rD      rH   r   r   )r   r   r   rl   r   Zmap_partitionsr   r   r   rj   Z	Timedeltar~   )r   method_namer?   r@   r   rD   r-   r.   r#   r#   r&   _call_method  sX   





zRolling._call_methodc                 C  
   |  dS )Ncountr   r   r#   r#   r&   r   /     
zRolling.countc                 C  r   )Ncovr   r   r#   r#   r&   r   3  r   zRolling.covc                 C  r   )Nsumr   r   r#   r#   r&   r   7  r   zRolling.sumc                 C  r   )Nmeanr   r   r#   r#   r&   r   ;  r   zRolling.meanc                 C  r   )Nmedianr   r   r#   r#   r&   r   ?  r   zRolling.medianc                 C  r   )Nr   r   r   r#   r#   r&   r   C  r   zRolling.minc                 C  r   )Nr   r   r   r#   r#   r&   r   G  r   zRolling.maxrH   c                 C     | j dddS )NstdrH   ddofr   r   r   r#   r#   r&   r   K     zRolling.stdc                 C  r   )NvarrH   r   r   r   r#   r#   r&   r   O  r   zRolling.varc                 C  r   )Nskewr   r   r#   r#   r&   r   S  r   zRolling.skewc                 C  r   )Nkurtr   r   r#   r#   r&   r   W  r   zRolling.kurtc                 C  s   |  d|S )Nquantiler   )r   r   r#   r#   r&   r   [  s   zRolling.quantilecythonc              	   C  s(   |pi }|pd}| j d||||||dS )Nr#   r   )rawengineengine_kwargsr?   r@   r   )r   r>   r   r   r   r?   r@   r#   r#   r&   r   _  s   
zRolling.applyc                 O  s   | j d|g|R i |S )Naggr   )r   r>   r?   r@   r#   r#   r&   	aggregateu  s   zRolling.aggregatec              	   C  sJ   dd }|   }| j|d< | j|d< dddd t| |d	D S )
Nc                 S  s    | \}}dddddd}|| S )Nr   rH   r         r   r   r   r   r   r#   )itemr|   r}   _orderr#   r#   r&   order|  s   zRolling.__repr__.<locals>.orderr   r   zRolling [{}],c                 s  s*    | ]\}}|d ur| d| V  qd S )N=r#   )r$   r|   r}   r#   r#   r&   rL     s    z#Rolling.__repr__.<locals>.<genexpr>)r^   )r   r   r   formatjoinsortedru   )r   r   r   r#   r#   r&   __repr__{  s   


zRolling.__repr__)rH   )Fr   NNN)__name__
__module____qualname____doc__r   r   r   propertyr   staticmethodr   r   r   
pd_Rollingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r#   r#   r&   r     sb    


(










r   c                      sT   e Zd Z					d fdd	Z fddZedddd	d
Z fddZ  ZS )RollingGroupbyNFr   c           	        s   |j | _ |j| _|j}| jd ur7t| jtr| jg}nt| j}t|jtr-||j n|	|j || }t
 j||||||d d S )Nr   )_groupby_kwargsZ_slice_groupby_slicer   r(   ri   listZbyrs   rt   superr   )	r   groupbyr   r   r   r   r   r   Zsliced_plus	__class__r#   r&   r     s&   	



zRollingGroupby.__init__c                   s(   t   }|dd dv r|d |S )Nr   )r   rk   )r   r   getro   r   r   r#   r&   r     s   

zRollingGroupby._rolling_kwargsgroupby_kwargsgroupby_slicec          	      O  sH   | j di |}|r|| }|jdi |}t|||i |jddS )Nrf   )levelr#   )r   r   r   Z
sort_index)	r6   r   rw   r   r   r?   r@   r   r   r#   r#   r&   r     s
   
z$RollingGroupby.pandas_rolling_methodc                   s$   t  j|g|R | j| jd|S )Nr   )r   r   r   r   )r   r   r?   r@   r   r#   r&   r     s   zRollingGroupby._call_method)NNFNr   )	r   r   r   r   r   r   r   r   __classcell__r#   r#   r   r&   r     s    !r   )@
__future__r   r;   r   numbersr   Zpandasrj   Zpandas.api.typesr   Zpandas.core.windowr   r   Zdask.array.corer   Z	dask.baser   Zdask.blockwiser   Zdask.dataframer	   Zdask.dataframe._compatr
   Zdask.dataframe.corer   r   r   r   r   r   r   r   Zdask.dataframe.ior   Zdask.dataframe.multir   Zdask.dataframe.utilsr   r   r   r   Zdask.delayedr   Zdask.highlevelgraphr   Zdask.typingr   Z
dask.utilsr   r   r   r   r    typerr   r!   r2   rC   r~   rR   rP   r   r   r   r#   r#   r#   r&   <module>   sJ    (
 ]%A L