
    tfy9                    l   d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$ d d
l%m&Z& eZ'eZ(d8dZ)d8dZ* e&dd      d        Z+d Z,d8dZ-d9dZ.d Z/d Z0d Z1d Z2d Z3d Z4d Z5	 d:dZ6d Z7d Z8d Z9d Z:d Z;d Z<d  Z=d8d!Z>d;d"Z?	 d<d#Z@d$ ZAd% ZBd& ZCd' ZDd( ZEd=d)ZFd* ZGd+ ZHd, ZId- ZJd. ZKd/ ZLd0 ZMd1 ZNd2 ZOd3 ZPd4 ZQ eeOd56      ZR eeOd76      ZS eePd56      ZT eePd76      ZU eeQd56      ZV eeQd76      ZWy)>    )annotationsN)partial)is_extension_array_dtype)PerformanceWarning)	partition)PANDAS_GE_131PANDAS_GE_140PANDAS_GE_200!check_apply_dataframe_deprecation$check_applymap_dataframe_deprecationcheck_convert_dtype_deprecationcheck_observed_deprecation)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_like)_deprecated_kwargc                F    || j                   |   S | j                   ||f   S )z"
    .loc for known divisions
    )locdfiindexercindexers      `/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/dask/dataframe/methods.pyr   r   .   s-     vvhvvh())    c                (    | j                   d d |f   S N)iloc)r   r!   s     r"   r&   r&   8   s    771h;r#   convert_dtypec                    t               5  t               5   | j                  |i |cd d d        cd d d        S # 1 sw Y   nxY w	 d d d        y # 1 sw Y   y xY wr%   )r   r   applyr   argskwargss      r"   r)   r)   <   s_    	(	* -.0 	-288T,V,	- 	-- -	- 	- 	-- - -s   A;	AA	 AAc                f    t               5   | j                  |i |cd d d        S # 1 sw Y   y xY wr%   )r   applymapr*   s      r"   r.   r.   C   s0    	-	/ ,r{{D+F+, , ,s   '0c                    	 t        | ||      S # t        $ r% | j                  d      j                   dd|f   cY S w xY w)z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   s      r"   try_locr2   H   s@    +2x** +wwqz~~ak**+s    +==c                `   t        | j                        dk(  r| S t        r"|t        j                  dt
               i }d}n
|xs d}d|i}|dk(  rk| j                  j                  sU|'|r| | j                  |k\     } n| | j                  |kD     } |(|r| | j                  |k     } | S | | j                  |k     } | S t        | |      || }|s/|- |j                  j                  |dfi |}|j                  d| }|s/|- |j                  j                  |dfi |}	|j                  |	d }|S )	aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   NzXThe `kind` argument is no longer used/supported. It will be dropped in a future release.)categoryr   kindleftright)
lenindexr   warningswarnFutureWarningis_monotonic_increasinggetattrget_slice_boundr&   )
r   startstopright_boundaryleft_boundaryr5   	kind_optsresultright_index
left_indexs
             r"   boundary_slicerH   R   sW   @ 288}	MM:&
 	}uTN	u}RXX==
 E)*5()D() 	 4(	RuT*Fd.2fll224M9M\k*U.1V\\11%NIN
Z[)Mr#   c                H    t        j                  |       j                         S r%   )pdnotnullsumxs    r"   index_countrO      s    ::a=r#   c                    	 t        j                  d      5  t        j                  d       | |z  cd d d        S # 1 sw Y   y xY w# t        $ r& t	        j
                  t        j                        cY S w xY w)NT)recordalways)r:   catch_warningssimplefilterZeroDivisionErrornpfloat64nan)sns     r"   mean_aggregater[      sb    "$$D1 	!!(+q5	 	 	  "zz"&&!!"s+   A <	A AA A ,A76A7c                    t        | t        j                        st        | t              rt	        j
                  | |      S | S Nr9   
isinstancerV   ndarraylistrJ   Series)	array_varr9   s     r"   wrap_var_reductionre      s0    )RZZ(Jy$,Gyy%00r#   c                    t        | t        j                        st        | t              rt	        j
                  | |      S | S r]   r_   )
array_skewr9   s     r"   wrap_skew_reductionrh      s0    *bjj)Z
D-Iyy511r#   c                    t        | t        j                        st        | t              rt	        j
                  | |      S | S r]   r_   )array_kurtosisr9   s     r"   wrap_kurtosis_reductionrk      s0    ."**-ND1Qyyu55r#   c                T    t        j                  | |g      }|j                  |      S r]   )rJ   r   reindex)numeric_vartimedelta_varcolumnsvarss       r"   var_mixed_concatrr      s&    99k=12D<<g<&&r#   c                    t        |       dkD  sJ g }t        d | D        t               }|D ]  }|D ]  }||vs|j                  |        ! t        j                  | dd      j                  |      S )Nr   c              3  4   K   | ]  }|j                     y wr%   r^   ).0rN   s     r"   	<genexpr>z%describe_aggregate.<locals>.<genexpr>   s     5QWW5s   )key   F)axissort)r8   sortedappendrJ   r   rm   )valuesnamesvalues_indexesidxnamesnames        r"   describe_aggregater      s}    v;?? E5f53?N" # 	#D5 T"	##
 99V!%088??r#   c                <   t        |       dk(  sJ | \  }}}}}}	t        |      rt        |j                               }
nt        |      }
|rft	        j
                  |      }t	        j
                  |      }t	        j
                  |      }t	        j
                  |	      }	|j                  d       }|r<t	        j                  |      }t	        j                  |	      }	|j                  d       }|r |
||gddg      }n |
||||gg d      }t        |j                        D cg c]  }|dz  d	d
 c}|_	        t        |      r|
t        |      k7  r|j                         } |
|	gdg      }t        |||gd      }t        |      r||_        |S c c}w )N   c                ,    t        j                  |       S r%   )rJ   to_timedeltarM   s    r"   <lambda>z,describe_numeric_aggregate.<locals>.<lambda>   s    booa0 r#   c                ,    t        j                  |       S r%   )rJ   to_datetimerM   s    r"   r   z,describe_numeric_aggregate.<locals>.<lambda>   s    bnnQ/ r#   countminr^   )r   meanstdr   d   g%maxF)rz   )r8   r   typeto_framerJ   r   r)   r   r   r9   r   r   )statsr   is_timedelta_colis_datetime_colr   r   r   r   qr   typpart1lpart3rE   s                  r"   describe_numeric_aggregater      sm    u:??$)!E4c1ce5>>#$1gt$ooc"ooc"ooc"GG01nnS!nnS!GG/0UCL%(89UD#s+3RS(.qww81!c'!A8AGaSDG^JJLeW%EUAu%E2FfM 9s   (Fc                H   t        |       }|dk(  }|dk(  }|s|sJ |r| \  }}}n| \  }}}}}	t        |      dk(  rmddg}
ddg}d }|
j                  t        j                  t        j                  g       |j                  ddg       t        }t        j                  |
|||      }|S |j                  d   }|j                  d   }g d	}||g}|r|j                  }t        j                  |      }|j                  ||j                  |      }n|j                  |      }t        j                  |
      }t        j                  	|
      }|j                  ddg       |j                  ||||g       n|j                  ||g       t        j                  |||      S )N      r   r   uniquetopfreq)r9   dtyper   )r   r   r   r   )tzfirstlast)r9   r   )r8   extendrV   rX   objectrJ   rc   r9   r&   r   	Timestamptzinfo
tz_converttz_localize)r   r   args_lenis_datetime_columnis_categorical_columnnuniquer   top_freqmin_tsmax_tsdatar9   r   rE   r   r   r}   r   r   r   s                       r"   describe_nonnumeric_aggregater      s   5zH!Q$M!666#( 380&& 8}1v(#RVVRVV$%eV_%4uEE
..
C==D.EuFVVll3::!bn..$C//"%CV+||Fr*gv&'sD%./sDk"99V5t44r#   c                    ||S  | ||      S )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
     )	aggregaterN   ys      r"   _cum_aggregate_applyr   /  s     	yAr#   c                    | |S || S | |z   S r%   r   rN   r   s     r"   cumsum_aggregater   @      y	
1ur#   c                    | |S || S | |z  S r%   r   r   s     r"   cumprod_aggregater   I  r   r#   c                    t        |       st        |       r5| j                  | |k  | j                         z  || j                  dz
        S | |k  r| S |S Nrx   ry   r   r   whereisnullndimr   s     r"   cummin_aggregater   R  O    a-a0wwA+QQVVaZw@@Eq q r#   c                    t        |       st        |       r5| j                  | |kD  | j                         z  || j                  dz
        S | |kD  r| S |S r   r   r   s     r"   cummax_aggregater   Y  r   r#   c                   t        t        d|            }t        t        |      t        | j                        z        xr t
         }| j                  t        |            } t        j                         5  t        j                  ddt               |j                         D ]
  \  }}|| |<    	 d d d        | S # 1 sw Y   | S xY w)N   )deepignorez DataFrame is highly fragmented *)messager4   )dictr   boolsetrp   r	   copyr:   rS   filterwarningsr   items)r   pairsr   r   vals        r"   assignr   `  s     1e$%EE
S_,-Cm2CD	d4j	!B		 	 	" 6'	

  	ID#BtH	 I Is   8:B==Cc                    | j                         }t        |      s"t        |      st        j                  ||      }|S )N)r   )r   r   r   rJ   rc   )rN   series_nameouts      r"   r   r   r  s3    
((*C 3=#5ii+.Jr#   c                    t               5   | j                  dddi|j                         cd d d        S # 1 sw Y   y xY w)Nlevelr   r   )r   groupbyrL   )rN   rz   	ascendinggroupby_kwargss       r"   value_counts_combiner   {  s<    	#	% :qyy3q3N3779: : :s	   "7A c                    t        | fi |}|r|||n|j                         z  }|r|j                  |      }t        r	|rd|_        |S )N)r   
proportion)r   rL   sort_valuesr
   r   )rN   total_lengthrz   r   	normalizer   r   s          r"   value_counts_aggregater     sQ     q
3N
3C|7|SWWYFoo	o2Jr#   c                    | j                   S r%   )nbytesrM   s    r"   r   r     s    88Or#   c                    | j                   S r%   )sizerM   s    r"   r   r     s    66Mr#   c                ^    | j                   }t        |      r|j                  t              }|S r%   )r}   r   astyper   )r   r}   s     r"   r}   r}     s(    YYF  'v&Mr#   c                    t         j                  j                  |      }t        |       dkD  r| j	                  |||      S | S )Nr   )random_statefracreplace)rV   randomRandomStater8   sample)r   stater   r   rss        r"   r   r     s;    			u	%BEHWq[299"49AXVXXr#   c                l    | j                  |d      } | j                  j                  |      | _        | S r   )droprp   r   )r   rp   r   s      r"   drop_columnsr     s/    	q	!B""5)BJIr#   c                    |r t        | |             }n| j                         }|rC|j                         j                  j	                  d      j                         rt        d      |S )Nr   r   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)r>   fillnar   r}   allany
ValueError)r   methodcheckr   s       r"   fillna_checkr    sb    !gb&!#iik$$((a(04464
 	

 Jr#   c                D    | j                  dd      j                         S Nr   F)r   observed)r   rL   r   s    r"   	pivot_aggr    s    ::A:.2244r#   c                D    | j                  dd      j                         S r  )r   r   r  s    r"   pivot_agg_firstr	    s    ::A:.4466r#   c                D    | j                  dd      j                         S r  )r   r   r  s    r"   pivot_agg_lastr    s    ::A:.3355r#   c           	     :    t        j                  | |||ddd      S )NrL   Fr9   rp   r}   aggfuncdropnar  rJ   pivot_tabler   r9   rp   r}   s       r"   	pivot_sumr    s(    >>
 r#   c           	     t    t        j                  | |||ddd      j                  t        j                        S )Nr   Fr  )rJ   r  r   rV   rW   r  s       r"   pivot_countr    s:     >>
 fRZZr#   c           	     :    t        j                  | |||ddd      S )Nr   Fr  r  r  s       r"   pivot_firstr    s(    >>
 r#   c           	     :    t        j                  | |||ddd      S )Nr   Fr  r  r  s       r"   
pivot_lastr    s(    >>
 r#   c                4    | j                         } || _        | S r%   )r   r9   )r   inds     r"   assign_indexr    s    	BBHIr#   c                    | j                   rd }n/t        |       r| n| j                  }t        | |      |d   |d   gg}t	        j
                  |g d      S )Nr   	monotonicr   r   )r   rp   )emptyr   r&   r>   rJ   	DataFrame)rN   propr   s      r"   _monotonic_chunkr$    sP    ww!!$q!&&D!47DH56<<T+IJJr#   c                @   | j                   rd }nwt        j                  | ddg   j                         j	                               }| d   j                         xr t        ||      }||j                  d   |j                  d   gg}t        j                  |g d      S )Nr   r   r   r   r  r  )rp   )	r!  rJ   rc   to_numpyravelr   r>   r&   r"  )concatenatedr#  r   rY   is_monotonics        r"   _monotonic_combiner*    s    IIlGV#45>>@FFHI#K0446K71d;Kqvvay!&&*56<<&DEEr#   c                    t        j                  | ddg   j                         j                               }| d   j	                         xr t        ||      S )Nr   r   r   )rJ   rc   r&  r'  r   r>   )r(  r#  rY   s      r"   _monotonic_aggregater,    sL    
		,01::<BBDEA$((*?wq$/??r#   r=   )r#  is_monotonic_decreasingr%   )TTN)NFF)TF)NTFF)T)X
__future__r   r:   	functoolsr   numpyrV   pandasrJ   pandas.api.typesr   pandas.errorsr   tlzr   dask.dataframe._compatr   r	   r
   r   r   r   r   dask.dataframe.dispatchr   r   r   r   r   r   r   r   r   dask.dataframe.utilsr   r   r   
dask.utilsr   hash_dfgroup_splitr   r&   r)   r.   r2   rH   rO   r[   re   rh   rk   rr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r   r   r  r  r	  r  r  r  r  r  r  r$  r*  r,  monotonic_increasing_chunkmonotonic_decreasing_chunkmonotonic_increasing_combinemonotonic_decreasing_combinemonotonic_increasing_aggregatemonotonic_decreasing_aggregater   r#   r"   <module>rA     s   "     5 ,   
 
 
 R Q ( "*  ?D)- *-,
+HV
"'@ ?D'T.5b"!!$: AF
Y
&576			KF@
 %%5<UV $%5<UV &6    '6   ")8"  ")8" r#   