o
    Nrfy9                     @  s  d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$ d d
l%m&Z& eZ'eZ(dcddZ)dcddZ*e&dddd Z+dd Z,dcddZ-ddddZ.dd Z/dd Z0dd Z1dd  Z2d!d" Z3d#d$ Z4d%d& Z5	'ded(d)Z6d*d+ Z7d,d- Z8d.d/ Z9d0d1 Z:d2d3 Z;d4d5 Z<d6d7 Z=dcd8d9Z>dfd:d;Z?	'dgd<d=Z@d>d? ZAd@dA ZBdBdC ZCdDdE ZDdFdG ZEdhdHdIZFdJdK ZGdLdM ZHdNdO ZIdPdQ ZJdRdS ZKdTdU ZLdVdW ZMdXdY ZNdZd[ ZOd\d] ZPd^d_ ZQeeOd`daZReeOdbdaZSeePd`daZTeePdbdaZUeeQd`daZVeeQdbdaZWdS )i    )annotationsN)partial)is_extension_array_dtype)PerformanceWarning)	partition)PANDAS_GE_131PANDAS_GE_140PANDAS_GE_200!check_apply_dataframe_deprecation$check_applymap_dataframe_deprecationcheck_convert_dtype_deprecationcheck_observed_deprecation)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_like)_deprecated_kwargc                 C  s    |du r	| j | S | j ||f S )z"
    .loc for known divisions
    N)locdfZiindexercindexer r   _/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/dataframe/methods.pyr   .   s   
r   c                 C  s   | j d d |f S N)iloc)r   r   r   r   r    r"   8      r"   Zconvert_dtypec              	   O  sp   t  + t  | j|i |W  d    W  d    S 1 s!w   Y  W d    d S 1 s1w   Y  d S r!   )r   r
   applyr   argskwargsr   r   r    r$   <   s   "r$   c                 O  s:   t   | j|i |W  d    S 1 sw   Y  d S r!   )r   applymapr%   r   r   r    r(   C   s   $r(   c                 C  s:   zt | ||W S  ty   | dj dd|f  Y S w )z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   r   r   r    try_locH   s
   r+   Tc           
      C  s&  t | jdkr	| S tr|durtjdtd i }d}n|pd}d|i}|dkrX| jjsX|dur@|r9| | j|k } n| | j|k } |durV|rO| | j|k } | S | | j|k  } | S t| ||| }|sy|dury|jj|dfi |}|j	d| }|s|dur|jj|dfi |}	|j	|	d }|S )	aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   NzXThe `kind` argument is no longer used/supported. It will be dropped in a future release.)categoryr   kindleftright)
lenindexr   warningswarnFutureWarningis_monotonic_increasinggetattrZget_slice_boundr"   )
r   startstopZright_boundaryZleft_boundaryr-   Z	kind_optsresultZright_indexZ
left_indexr   r   r    boundary_sliceR   s>    r:   c                 C  s   t |  S r!   )pdZnotnullsumxr   r   r    index_count   s   r?   c                 C  sf   z#t jdd t d | | W  d    W S 1 sw   Y  W d S  ty2   ttj Y S w )NT)recordalways)r2   catch_warningssimplefilterZeroDivisionErrornpfloat64nan)snr   r   r    mean_aggregate   s   
(rJ   c                 C  (   t | tjst | trtj| |dS | S Nr1   
isinstancerE   Zndarraylistr;   Series)Z	array_varr1   r   r   r    wrap_var_reduction      rR   c                 C  rK   rL   rN   )Z
array_skewr1   r   r   r    wrap_skew_reduction   rS   rT   c                 C  rK   rL   rN   )Zarray_kurtosisr1   r   r   r    wrap_kurtosis_reduction   rS   rU   c                 C  s   t | |g}|j|dS rL   )r;   r   reindex)Znumeric_varZtimedelta_varcolumnsvarsr   r   r    var_mixed_concat   s   rY   c                 C  sf   t | dksJ g }tdd | D t d}|D ]}|D ]}||vr&|| qqtj| ddd|S )Nr   c                 s  s    | ]}|j V  qd S r!   rM   ).0r>   r   r   r    	<genexpr>   s    z%describe_aggregate.<locals>.<genexpr>)key   F)axissort)r0   sortedappendr;   r   rV   )valuesnamesZvalues_indexesZidxnamesnamer   r   r    describe_aggregate   s   
re   Fc                 C  s6  t | dksJ | \}}}}}}	t|rt| }
nt|}
|r<t|}t|}t|}t|	}	|dd }|rOt|}t|	}	|dd }|r\|
||gddgd}n|
||||gg dd}d	d
 t|j	D |_	t|r|
t|kr| }|
|	gdgd}t
|||gdd}t|r||_|S )N   c                 S  
   t | S r!   )r;   to_timedeltar=   r   r   r    <lambda>      
 z,describe_numeric_aggregate.<locals>.<lambda>c                 S  rg   r!   )r;   to_datetimer=   r   r   r    ri      rj   countminrM   )rl   meanstdrm   c                 S  s   g | ]
}|d  ddqS )d   g%r   )rZ   lr   r   r    
<listcomp>   s    z.describe_numeric_aggregate.<locals>.<listcomp>maxF)r_   )r0   r   typeZto_framer;   rh   r$   rk   r   r1   r   rd   )statsrd   Zis_timedelta_colZis_datetime_colrl   rn   ro   rm   qru   typZpart1Zpart3r9   r   r   r    describe_numeric_aggregate   s4   





rz   c                 C  sX  t | }|dk}|dk}|s|sJ |r| \}}}n| \}}}}}	t |dkrNddg}
ddg}d }|
tjtjg |ddg t}tj|
|||d}|S |jd }|jd }g d	}||g}|r|j	}t
|}|jd ury|d ury||}n||}tj
||d
}tj
|	|d
}|ddg |||||g n|||g tj|||dS )N      r   rl   uniquetopfreq)r1   dtyperd   )r}   rl   r~   r   )tzfirstlast)r1   rd   )r0   extendrE   rG   objectr;   rQ   r1   r"   r   	TimestamptzinfoZ
tz_convertZtz_localize)rw   rd   Zargs_lenZis_datetime_columnZis_categorical_columnZnuniquerl   Ztop_freqZmin_tsZmax_tsdatar1   r   r9   r~   r   rb   r   r   r   r   r   r    describe_nonnumeric_aggregate   s@   



r   c                 C  s   |du r|S | ||S )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
    Nr   )Z	aggregater>   yr   r   r    _cum_aggregate_apply/  s   
r   c                 C  s    | d u r|S |d u r| S | | S r!   r   r>   r   r   r   r    cumsum_aggregate@  
   r   c                 C  s    | d u r|S |d u r| S | | S r!   r   r   r   r   r    cumprod_aggregateI  r   r   c                 C  sB   t | st| r| j| |k |  B || jd dS | |k r| S |S Nr]   r^   r   r   whereisnullndimr   r   r   r    cummin_aggregateR     "r   c                 C  sB   t | st| r| j| |k|  B || jd dS | |kr| S |S r   r   r   r   r   r    cummax_aggregateY  r   r   c                 G  s   t td|}tt|t| j@ ot }| jt|d} t  tj	ddt
d | D ]\}}|| |< q.W d    | S 1 sBw   Y  | S )N   )deepignorez DataFrame is highly fragmented *)messager,   )dictr   boolsetrW   r   copyr2   rB   filterwarningsr   items)r   pairsr   rd   valr   r   r    assign`  s    


r   c                 C  s*   |   }t|st|stj||d}|S )N)rd   )r}   r   r   r;   rQ   )r>   Zseries_nameoutr   r   r    r}   r  s   r}   c                 K  sB   t   | jdddi| W  d    S 1 sw   Y  d S )Nlevelr   r   )r   groupbyr<   )r>   r_   	ascendinggroupby_kwargsr   r   r    value_counts_combine{  s   $r   c                 K  sN   t | fi |}|r||d ur|n|  }|r|j|d}tr%|r%d|_|S )N)r   Z
proportion)r   r<   Zsort_valuesr	   rd   )r>   total_lengthr_   r   	normalizer   r   r   r   r    value_counts_aggregate  s   r   c                 C     | j S r!   )nbytesr=   r   r   r    r        r   c                 C  r   r!   )sizer=   r   r   r    r     r   r   c                 C  s   | j }t|r|t}|S r!   )rb   r   astyper   )r   rb   r   r   r    rb     s   
rb   c                 C  s,   t j|}t| dkr| j|||dS | S )Nr   )Zrandom_statefracreplace)rE   randomZRandomStater0   sample)r   stater   r   rsr   r   r    r     s    r   c                 C  s    | j |dd} | j|| _| S r   )ZdroprW   r   )r   rW   r   r   r   r    drop_columns  s   r   c                 C  s@   |r	t | | }n|  }|r| jjdd rtd|S )Nr   r   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)r6   Zfillnar   rb   allany
ValueError)r   methodcheckr   r   r   r    fillna_check  s   r   c                 C     | j ddd S Nr   F)r   observed)r   r<   r   r   r   r    	pivot_agg  r#   r   c                 C  r   r   )r   r   r   r   r   r    pivot_agg_first  r#   r   c                 C  r   r   )r   r   r   r   r   r    pivot_agg_last  r#   r   c              	   C     t j| |||ddddS )Nr<   Fr1   rW   rb   ZaggfuncZdropnar   r;   pivot_tabler   r1   rW   rb   r   r   r    	pivot_sum     r   c              	   C  s    t j| |||ddddtjS )Nrl   Fr   )r;   r   r   rE   rF   r   r   r   r    pivot_count  s   r   c              	   C  r   )Nr   Fr   r   r   r   r   r    pivot_first  r   r   c              	   C  r   )Nr   Fr   r   r   r   r   r    
pivot_last  r   r   c                 C  s   |   } || _| S r!   )r   r1   )r   indr   r   r    assign_index  s   r   c                 C  sJ   | j rd }nt| r| n| j}t| ||d |d gg}tj|g ddS )Nr   	monotonicr   r   )r   rW   )emptyr   r"   r6   r;   	DataFrame)r>   propr   r   r   r    _monotonic_chunk  s
   r   c                 C  sf   | j rd }n$t| ddg   }| d  ot||}||jd |jd gg}tj|g ddS )Nr   r   r   r   r   r   )rW   )	r   r;   rQ   to_numpyravelr   r6   r"   r   )concatenatedr   r   rH   Zis_monotonicr   r   r    _monotonic_combine  s   r   c                 C  s0   t | ddg   }| d  ot||S )Nr   r   r   )r;   rQ   r   r   r   r6   )r   r   rH   r   r   r    _monotonic_aggregate  s   r   r5   )r   Zis_monotonic_decreasingr!   )TTN)NFF)TF)NTFF)T)X
__future__r   r2   	functoolsr   numpyrE   Zpandasr;   Zpandas.api.typesr   Zpandas.errorsr   Ztlzr   Zdask.dataframe._compatr   r   r	   r
   r   r   r   Zdask.dataframe.dispatchr   r   r   r   r   r   r   r   r   Zdask.dataframe.utilsr   r   r   Z
dask.utilsr   Zhash_dfZgroup_splitr   r"   r$   r(   r+   r:   r?   rJ   rR   rT   rU   rY   re   rz   r   r   r   r   r   r   r   r}   r   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zmonotonic_increasing_chunkZmonotonic_decreasing_chunkZmonotonic_increasing_combineZmonotonic_decreasing_combineZmonotonic_increasing_aggregateZmonotonic_decreasing_aggregater   r   r   r    <module>   s    $,






K	
*1		

	
	



