
    >ie7                    n   d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$ eZ%eZ&d8d
Z'd8dZ(d Z)d Z*d8dZ+d9dZ,d Z-d Z.d Z/d Z0d Z1d Z2d Z3	 d:dZ4d Z5d Z6d Z7d Z8d Z9d Z:d  Z;d8d!Z<d;d"Z=	 d<d#Z>d$ Z?d% Z@d& ZAd' ZBd( ZCd=d)ZDd* ZEd+ ZFd, ZGd- ZHd. ZId/ ZJd0 ZKd1 ZLd2 ZMd3 ZNd4 ZO eeMd56          ZP eeMd76          ZQ eeNd56          ZR eeNd76          ZS eeOd56          ZT eeOd76          ZUdS )>    )annotationsN)partial)is_extension_array_dtype)PerformanceWarning)	partition)PANDAS_GE_131PANDAS_GE_140PANDAS_GE_200!check_apply_dataframe_deprecation$check_applymap_dataframe_deprecationcheck_convert_dtype_deprecationcheck_observed_deprecation)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_likec                >    || j         |         S | j         ||f         S )z"
    .loc for known divisions
    )locdfiindexercindexers      6lib/python3.11/site-packages/dask/dataframe/methods.pyr   r   -   s)     vhvh())    c                $    | j         d d |f         S N)iloc)r   r    s     r!   r%   r%   7   s    7111h;r"   c                    t                      5  t                      5   | j        |i |cd d d            cd d d            S # 1 swxY w Y   	 d d d            d S # 1 swxY w Y   d S r$   )r   r   applyr   argskwargss      r!   r'   r'   ;   s   	(	*	* - -.00 	- 	-28T,V,,	- 	- 	- 	- 	- 	- 	-- - - - - - - -	- 	- 	- 	- 	- 	- 	- 	- 	-- - - - - - - - - - - - - - - - - -s3   AAAA	AA	AA!$A!c                l    t                      5   | j        |i |cd d d            S # 1 swxY w Y   d S r$   )r   applymapr(   s      r!   r,   r,   A   s    	-	/	/ , ,r{D+F++, , , , , , , , , , , , , , , , , ,s   )--c                    	 t          | ||          S # t          $ r' |                     d          j         dd|f         cY S w xY w)z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   s      r!   try_locr0   F   sY    +2x*** + + +wwqzz~aaak****+s    .AATc                ,   t          | j                  dk    r| S t          r"|t          j        dt
                     i }d}n|pd}d|i}|dk    r\| j        j        sP|%|r| | j        |k             } n| | j        |k             } |%|r| | j        |k             } n| | j        |k              } | S t          | |          ||         }|s%|# |j        j        |dfi |}|j	        d|         }|s%|# |j        j        |dfi |}	|j	        |	d         }|S )	aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   NzXThe `kind` argument is no longer used/supported. It will be dropped in a future release.)categoryr   kindleftright)
lenindexr   warningswarnFutureWarningis_monotonic_increasinggetattrget_slice_boundr%   )
r   startstopright_boundaryleft_boundaryr3   	kind_optsresultright_index
left_indexs
             r!   boundary_slicerF   P   s{   @ 28}}	 #M:&   
 	}uTN	u}}RX=}
  *E)*5() )D()4(	RuTz*F +d.2fl24MM9MM\k\* *U.1V\1%NNINN
Z[[)Mr"   c                N    t          j        |                                           S r$   )pdnotnullsumxs    r!   index_countrM      s    :a==r"   c                    	 t          j        d          5  t          j        d           | |z  cd d d            S # 1 swxY w Y   d S # t          $ r! t	          j        t          j                  cY S w xY w)NT)recordalways)r8   catch_warningssimplefilterZeroDivisionErrornpfloat64nan)sns     r!   mean_aggregaterY      s    "$D111 	 	!(+++q5	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	  " " "z"&!!!!!"s1   A
 =A
 AA
 AA
 
(A54A5c                    t          | t          j                  st          | t                    rt	          j        | |          S | S Nr7   
isinstancerT   ndarraylistrH   Series)	array_varr7   s     r!   wrap_var_reductionrc      sB    )RZ(( 1Jy$,G,G 1y%0000r"   c                    t          | t          j                  st          | t                    rt	          j        | |          S | S r[   r]   )
array_skewr7   s     r!   wrap_skew_reductionrf      sB    *bj)) 2Z
D-I-I 2y51111r"   c                    t          | t          j                  st          | t                    rt	          j        | |          S | S r[   r]   )array_kurtosisr7   s     r!   wrap_kurtosis_reductionri      sB    ."*-- 6ND1Q1Q 6yu5555r"   c                Z    t          j        | |g          }|                    |          S r[   )rH   r   reindex)numeric_vartimedelta_varcolumnsvarss       r!   var_mixed_concatrp      s*    9k=122D<<g<&&&r"   c                
   t          |           dk    sJ g }t          d | D             t                     }|D ] }|D ]}||vr|                    |           !t          j        | dd                              |          S )Nr   c              3  $   K   | ]}|j         V  d S r$   r\   ).0rL   s     r!   	<genexpr>z%describe_aggregate.<locals>.<genexpr>   s$      55QW555555r"   )key   F)axissort)r6   sortedappendrH   r   rk   )valuesnamesvalues_indexesidxnamesnames        r!   describe_aggregater      s    v;;???? E55f5553???N" # # 	# 	#D5  T"""	# 9V!%00088???r"   Fc                   t          |           dk    sJ | \  }}}}}}	t          |          r"t          |                                          }
nt          |          }
|rft	          j        |          }t	          j        |          }t	          j        |          }t	          j        |	          }	|                    d           }|r>t	          j        |          }t	          j        |	          }	|                    d           }|r |
||gddg          }n |
||||gg d          }d t          |j	                  D             |_	        t          |          r'|
t          |          k    r|                                } |
|	gd	g          }t          |||gd
          }t          |          r||_        |S )N   c                *    t          j        |           S r$   )rH   to_timedeltarK   s    r!   <lambda>z,describe_numeric_aggregate.<locals>.<lambda>   s    boa00 r"   c                *    t          j        |           S r$   )rH   to_datetimerK   s    r!   r   z,describe_numeric_aggregate.<locals>.<lambda>   s    bnQ// r"   countminr\   )r   meanstdr   c                     g | ]}|d z  ddS )d   g% )rs   ls     r!   
<listcomp>z.describe_numeric_aggregate.<locals>.<listcomp>   s$    8881!c'888r"   maxF)rx   )r6   r   typeto_framerH   r   r'   r   r   r7   r   r   )statsr   is_timedelta_colis_datetime_colr   r   r   r   qr   typpart1part3rC   s                 r!   describe_numeric_aggregater      s    u::????$)!E4c1ce 5>>##$$1gg 2t$$oc""oc""oc""GG0011 1nS!!nS!!GG//00 TUCL%(8999UD#s+3R3R3RSSS88qw888AGa SDGG^^JJLLCeW%%%EUAu%E222Ff Mr"   c                b   t          |           }|dk    }|dk    }|s|sJ |r| \  }}}n| \  }}}}}	t          |          dk    rmddg}
ddg}d }|
                    t          j        t          j        g           |                    ddg           t          }t          j        |
|||          }|S |j        d         }|j        d         }g d	}||g}|r|j	        }t          j
        |          }|j        ||                    |          }n|                    |          }t          j
        ||
          }t          j
        |	|
          }|                    ddg           |                    ||||g           n|                    ||g           t          j        |||          S )N      r   r   uniquetopfreq)r7   dtyper   )r   r   r   r   )tzfirstlast)r7   r   )r6   extendrT   rV   objectrH   ra   r7   r%   r   	Timestamptzinfo
tz_converttz_localize)r   r   args_lenis_datetime_columnis_categorical_columnnuniquer   top_freqmin_tsmax_tsdatar7   r   rC   r   r   r{   r   r   r   s                       r!   describe_nonnumeric_aggregater      s   5zzH!Q$M6!6666 9#( 380&& 8}}1v(#RVRV$%%%eV_%%%4uEEEE
.
C=D...EuF #Vl3:!bn..$$CC//"%%CV+++|Fr***gv&'''sD%.////sDk"""9V5t4444r"   c                "    ||S  | ||          S )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
    r   )	aggregaterL   ys      r!   _cum_aggregate_applyr   -  s     	yyAr"   c                    | |S || S | |z   S r$   r   rL   r   s     r!   cumsum_aggregater   >      y	
1ur"   c                    | |S || S | |z  S r$   r   r   s     r!   cumprod_aggregater   G  r   r"   c                    t          |           st          |           r9|                     | |k     |                                 z  || j        dz
            S | |k     r| n|S Nrv   rw   r   r   whereisnullndimr   s     r!   cummin_aggregater   P  c    a !-a00 !wwA+QQVaZw@@@EEqqq r"   c                    t          |           st          |           r9|                     | |k    |                                 z  || j        dz
            S | |k    r| n|S r   r   r   s     r!   cummax_aggregater   W  r   r"   c                   t          t          d|                    }t          t          |          t          | j                  z            ot
           }|                     t          |                    } t          j                    5  t          j	        ddt                     |                                D ]
\  }}|| |<   	 d d d            n# 1 swxY w Y   | S )N   )deepignorez DataFrame is highly fragmented *)messager2   )dictr   boolsetrn   r	   copyr8   rQ   filterwarningsr   items)r   pairsr   r   vals        r!   assignr   ^  s    1e$$%%EE

S__,--Cm2CD	d4jj	!	!B		 	"	"  6'	
 	
 	
 	

  	 	ID#BtHH	               Is   <CCCc                    |                                  }t          |          s%t          |          st          j        ||          }|S )N)r   )r   r   r   rH   ra   )rL   series_nameouts      r!   r   r   p  sI    
((**C 3 /=#5#5 /i+...Jr"   c                    t                      5   | j        dddi|                                cd d d            S # 1 swxY w Y   d S )Nlevelr   r   )r   groupbyrJ   )rL   rx   	ascendinggroupby_kwargss       r!   value_counts_combiner   y  s    	#	%	% : :qy33q3N337799: : : : : : : : : : : : : : : : : :s   !=AAc                    t          | fi |}|r|||n|                                z  }|r|                    |          }t          r	|rd|_        |S )N)r   
proportion)r   rJ   sort_valuesr
   r   )rL   rx   r   	normalizetotal_lengthr   r   s          r!   value_counts_aggregater     ss     q
3
3N
3
3C G|7||SWWYYF 3oo	o22    Jr"   c                    | j         S r$   )nbytesrK   s    r!   r   r     s	    8Or"   c                    | j         S r$   )sizerK   s    r!   r   r     s	    6Mr"   c                f    | j         }t          |          r|                    t                    }|S r$   )r{   r   astyper   )r   r{   s     r!   r{   r{     s1    YF  '' 'v&&Mr"   c                    t           j                            |          }t          |           dk    r|                     |||          n| S )Nr   )random_statefracreplace)rT   randomRandomStater6   sample)r   stater   r   rss        r!   r   r     sC    			u	%	%BEHWWq[[299"49AAAVXXr"   c                r    |                      |d          } | j                            |          | _        | S r   )droprn   r   )r   rn   r   s      r!   drop_columnsr     s3    	q	!	!B""5))BJIr"   c                   |r t          | |                      }n|                                 }|rN|                                j                            d                                          rt          d          |S )Nr   r   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)r<   fillnar   r{   allany
ValueError)r   methodcheckr   s       r!   fillna_checkr    s     !gb&!!##iikk 
$((a(004466 
4
 
 	

 Jr"   c                R    |                      d                                          S Nr   )r   )r   rJ   r   s    r!   	pivot_aggr    s"    ::A:""$$$r"   c                R    |                      d                                          S r  )r   r   r  s    r!   pivot_agg_firstr    s"    ::A:$$&&&r"   c                R    |                      d                                          S r  )r   r   r  s    r!   pivot_agg_lastr
    s"    ::A:##%%%r"   c                6    t          j        | |||dd          S )NrJ   Fr7   rn   r{   aggfuncdropnarH   pivot_tabler   r7   rn   r{   s       r!   	pivot_sumr    s(    >
%u   r"   c                p    t          j        | |||dd                              t          j                  S )Nr   Fr  )rH   r  r   rT   rU   r  s       r!   pivot_countr    s:     >
%QV  fRZr"   c                6    t          j        | |||dd          S )Nr   Fr  r  r  s       r!   pivot_firstr    s)    >
%QV   r"   c                6    t          j        | |||dd          S )Nr   Fr  r  r  s       r!   
pivot_lastr    s)    >
%PU   r"   c                <    |                                  } || _        | S r$   )r   r7   )r   inds     r!   assign_indexr    s    	BBHIr"   c                    | j         rd }n8t          |           r| n| j        }t          | |          |d         |d         gg}t	          j        |g d          S )Nr   	monotonicr   r   )r   rn   )emptyr   r%   r<   rH   	DataFrame)rL   propr   s      r!   _monotonic_chunkr#    sh    w 7!!$$0qq!&D!!47DH56<T+I+I+IJJJJr"   c                R   | j         rd }nt          j        | ddg                                                                                   }| d                                         ot          ||          }||j        d         |j        d         gg}t          j        |g d          S )Nr   r   r  r   r  r  )rn   )	r   rH   ra   to_numpyravelr   r<   r%   r!  )concatenatedr"  r   rW   is_monotonics        r!   _monotonic_combiner)    s     7IlGV#45>>@@FFHHII#K04466K71d;K;Kqvay!&*56<&D&D&DEEEEr"   c                    t          j        | ddg                                                                                   }| d                                         ot          ||          S )Nr   r   r  )rH   ra   r%  r&  r   r<   )r'  r"  rW   s      r!   _monotonic_aggregater+    sY    
	,01::<<BBDDEEA$((**?wq$/?/??r"   r;   )r"  is_monotonic_decreasingr$   )TTN)NFF)TF)TFFN)T)V
__future__r   r8   	functoolsr   numpyrT   pandasrH   pandas.api.typesr   pandas.errorsr   tlzr   dask.dataframe._compatr   r	   r
   r   r   r   r   dask.dataframe.dispatchr   r   r   r   r   r   r   r   r   dask.dataframe.utilsr   r   r   hash_dfgroup_splitr   r%   r'   r,   r0   rF   rM   rY   rc   rf   ri   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r{   r   r   r  r  r  r
  r  r  r  r  r  r#  r)  r+  monotonic_increasing_chunkmonotonic_decreasing_chunkmonotonic_increasing_combinemonotonic_decreasing_combinemonotonic_increasing_aggregatemonotonic_decreasing_aggregater   r"   r!   <module>r?     s   " " " " " "                5 5 5 5 5 5 , , , , , ,                       
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 R Q Q Q Q Q Q Q Q Q "* * * *       - - -, , ,
+ + + +H H H HV  
" " "      ' ' '@ @ @ ?D' ' ' 'T.5 .5 .5b  "    ! ! !! ! !  $   : : : : BF
 
 
 
      Y Y Y
     &% % %' ' '& & &          K K KF F F@ @ @
 %W%5<UVVV $W%5<UVVV &w6        'w6       ")8" " "  ")8" " "   r"   