o
    NrfWe                     @  s  U d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZmZmZmZmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8m9Z9m:Z: d dl;m<Z<m=Z= d dl>m?Z?m@Z@mAZA G dd deZBeddeBddZCe4DejEdd ZFe4DejGe4DejHe4DejIe4DejJdd ZFe(DejejfdbddZFe(DejdbddZFejejejejKfZLdeMd< zd dlNmOZP eLePjQf7 ZLW n
 eRy   Y nw e-DejfdbddZSe/Dejfdd  ZTe"Dejfd!d" ZUe,Dejd#d$ ZVe+DejWe(DejWdbd%d&ZXe)DeLdbd'd(ZYe+DeZd)d* Z[e+Dejd+d, Z\e+Dejd-d. Z]e+Dejdbd/d0Z^e*Ded1d2 Z_e*Dej`d3d4 Zae1Dejejejejbfdcd6d7Zce#Dejd8d9 Zde#Dejd:d; Zee#Dejd<d= Zfe#Ded>d? Zge#DeZd@dA Zhe&Dejejejf	BdddDdEZiG dFdG dGe<ejZke$DejejejfdedHdIZle!Dejejejf	 	J	5	B	5dfdKdLZmeDejejejfdgdMdNZne0Dej`ejejejbfdOdP Zoe'DejejejpjqjrejEfdQdR Zse%DejejfdSdT ZteDejejfdhdVdWZue.DejejejfdXdY ZvG dZd[ d[eBZweCxdew  e!yd\e"yd\e$yd\e#yd\e&yd\e+yd\e(yd\e)yd\eyd\e/yd\e0yd\d]d^ Zze*yd_e0yd_d`da Z{dS )i    )annotationsN)Iterable)	is_scalarunion_categoricals)Array)percentile_lookup_percentile)CreationDispatchDaskBackendEntrypoint)PANDAS_GE_220is_any_real_numeric_dtype)	DataFrameIndexScalarSeries_Frame)categorical_dtype_dispatchconcatconcat_dispatchfrom_pyarrow_table_dispatchget_parallel_typegroup_split_dispatchgrouper_dispatchhash_object_dispatchis_categorical_dtype_dispatchmake_meta_dispatchmake_meta_objmeta_lib_from_arraymeta_nonemptypartd_encode_dispatchpyarrow_schema_dispatchto_pandas_dispatchto_pyarrow_table_dispatchtolist_dispatchunion_categoricals_dispatch)make_array_nonemptymake_scalar)_empty_series_nonempty_scalar_scalar_from_dtypeis_float_na_dtypeis_integer_na_dtype)SimpleSizeofsizeof)is_arraylikeis_series_liketypenamec                   @  sd   e Zd ZdZedddZedd
dZedddZedddZedddZ	edddZ
dS )DataFrameBackendEntrypointzoDask-DataFrame version of ``DaskBackendEntrypoint``

    See Also
    --------
    PandasBackendEntrypoint
    datadictnpartitionsintc                K     t )a  Create a DataFrame collection from a dictionary

        Parameters
        ----------
        data : dict
            Of the form {field : array-like} or {field : dict}.
        npartitions : int
            The desired number of output partitions.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.io.from_dict
        NotImplementedError)r3   r5   kwargs r;   `/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/dataframe/backends.py	from_dict9      z$DataFrameBackendEntrypoint.from_dictpath
str | listc                 K  r7   )a$  Read Parquet files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.parquet.core.read_parquet
        r8   r?   r:   r;   r;   r<   read_parquetL      z'DataFrameBackendEntrypoint.read_parqueturl_pathc                 K  r7   )a  Read json files into a DataFrame collection

        Parameters
        ----------
        url_path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.json.read_json
        r8   )rD   r:   r;   r;   r<   	read_json]   rC   z$DataFrameBackendEntrypoint.read_jsonc                 K  r7   )a  Read ORC files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.orc.core.read_orc
        r8   rA   r;   r;   r<   read_orcn   rC   z#DataFrameBackendEntrypoint.read_orcurlpathc                 K  r7   )a  Read CSV files into a DataFrame collection

        Parameters
        ----------
        urlpath : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.csv.read_csv
        r8   )rG   r:   r;   r;   r<   read_csv   rC   z#DataFrameBackendEntrypoint.read_csvpatternkeystrc                 K  r7   )aT  Read HDF5 files into a DataFrame collection

        Parameters
        ----------
        pattern : str or list
            Source path(s).
        key : str
            Group identifier in the store.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.hdf.read_hdf
        r8   )rI   rJ   r:   r;   r;   r<   read_hdf   r>   z#DataFrameBackendEntrypoint.read_hdfN)r3   r4   r5   r6   )r?   r@   )rD   r@   )rG   r@   )rI   r@   rJ   rK   )__name__
__module____qualname____doc__staticmethodr=   rB   rE   rF   rH   rL   r;   r;   r;   r<   r2   1   s    r2   Z	dataframepandasdataframe_creation_dispatch)module_namedefaultZentrypoint_classnamec                 C     t | S N)r*   dtyper;   r;   r<   _      r[   c                 C     | S rX   r;   xr;   r;   r<   r[      s   c                 C  s*   | j d d jdd}|jjdd|_|S Nr   T)deep)iloccopyindex)r_   rd   outr;   r;   r<   r[      s   c                 C  s   | dd j ddS r`   rc   r_   rd   r;   r;   r<   r[      s   ztuple[type, ...]meta_object_typesc                 C  s   dd l }|jj| |dS )Nr   )preserve_index)pyarrowZSchemafrom_pandas)objri   par;   r;   r<   get_pyarrow_schema_pandas   s   rn   c                 K  s   dd l }|jj| fi |S Nr   )rj   Tablerk   )rl   r:   rm   r;   r;   r<   get_pyarrow_table_from_pandas   s   rq   c                   s8   dd l d	 fdd}|d|}|jd
d|i|S )Nr   pyarrow_dtypepa.DataTypereturnobjectc                   s4   |    hv rtd jjv rtdS d S )Nrj   )Zlarge_stringstringpdZStringDtypedtypesvalues)rr   metarm   r;   r<   default_types_mapper   s   
z?get_pandas_dataframe_from_pyarrow.<locals>.default_types_mappertypes_mapper)rr   rs   rt   ru   r;   )rj   popZ	to_pandas)r{   tabler:   r|   r}   r;   rz   r<   !get_pandas_dataframe_from_pyarrow   s   
r   c                 C  s   ddl m} |S )Nr   )PandasBlocks)Zpartdr   )r[   r   r;   r;   r<   partd_pandas_blocks   s   r   c                 C  rW   rX   )r)   rg   r;   r;   r<   make_meta_pandas_datetime_tz   s   r   c                   s4  t | r| jr| dd S  durt  t| tr*tj fdd|  D  dS t| tr@t	| dkr@t
| d | d  dS t| trnt| tsntdd	 | D sZtd
|  tj fdd| D dd | D  dS t| ds| durz
t| }t|W S  ty   Y nw t| rt| S td|  )a  Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------

    >>> make_meta_object([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta_object(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta_object('i8')
    1
    Nr   c                       i | ]\}}|t || d qS rd   r(   .0cdr   r;   r<   
<dictcomp>(       z$make_meta_object.<locals>.<dictcomp>r         c                 s  s&    | ]}t |tot|d kV  qdS )r   N)
isinstancetuplelenr   ir;   r;   r<   	<genexpr>-  s   $ z#make_meta_object.<locals>.<genexpr>z2Expected iterable of tuples of (name, dtype), got c                   r   r   r   r   r   r;   r<   r   0  r   c                 S  s   g | ]\}}|qS r;   r;   r   r;   r;   r<   
<listcomp>1      z$make_meta_object.<locals>.<listcomp>)columnsrd   rZ   z'Don't know how to create metadata from )r/   shaper   r   r4   rw   r   itemsr   r   r(   r   rK   all
ValueErrorhasattrnprZ   r*   	Exceptionr   r)   	TypeError)r_   rd   rZ   r;   r   r<   make_meta_object  s8   


r   c                 C  s&   t | rt| S tdtt|  )zCreate a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    z>Expected Pandas-like Index, Series, DataFrame, or scalar, got )r   r)   r   r1   typer^   r;   r;   r<   meta_nonempty_objectE  s   
r   c                 C  s   t | j}t }t }tt| jD ]'}| jd d |f }|j}||vr3t| jd d |f |d||< || ||< qt	j
||tt| jd}| j|_| j|_|S )N)idx)rd   r   )r   rd   r4   ranger   r   rb   rZ   _nonempty_seriesrw   r   r   Zarangeattrs)r_   r   Z	dt_s_dictr3   r   Zseriesdtresr;   r;   r<   meta_nonempty_dataframeU  s   
r   c                 C  s  t | }|tju rtjd| j| jdS t| r"|ddg| j| jdS |tju rZd}ztj|d| j| j	| jdW S  t
yY   | jd u rG|dgnd }tj||d| j| j	| jd Y S w |tju rjtjdd| j| jdS |tju rtdd}ztj|d| j| jdW S  t
y   tdd}| jd u r||d gnd }tj||d| j| jd Y S w |tju rt| jd	krtjt| j| jd
}ntjjdd	g| j| jd}tj|| jdS |tju rdd | jD }dd | jD }z
tj||| jdW S  ty
   tj||| jd Y S w |tju rCt | jtjv r'tjt| j| j| jdS | jtkr7tjddg| jdS tjddg| j| jdS tdt t |  )Nr   )rV   rZ   r   
1970-01-01)startperiodsfreqtzrV   z
1970-01-02)r   r   r   rV   Dr   )ordered
categoriesr   rV   c                 S  s   g | ]}t |qS r;   )_nonempty_index)r   lr;   r;   r<   r     r   z#_nonempty_index.<locals>.<listcomp>c                 S  s   g | ]}d d gqS )r   r;   r   r;   r;   r<   r     r   )levelscodesnames)r   labelsr   )rZ   rV   TFabz'Don't know how to handle index of type )!r   rw   Z
RangeIndexrV   rZ   r   ZDatetimeIndexZ
date_ranger   r   r   ZPeriodIndexZperiod_rangeZTimedeltaIndexr   Ztimedelta64Ztimedelta_rangeCategoricalIndexr   r   Categoricalr   r   
from_codes
MultiIndexr   r   r   r   r&   _lookupboolr1   )r   typr   r3   r   r   r;   r;   r<   r   f  st   






r   c                 C  s  |d u r	t | j}| j}t| dkr| jd gd }nt|tjr.tjd|j	d}||g}nt|tj
rat| jjrH| jjd gd }| jj}nt | jj}| jjd d }tj||| jjd}nut|rotjdd g|d}ngt|r}tjdd g|d}nYt|tjr|j}td	|td
|g}nCt|tjrt|j}tj||g|d}n.t|tjrt|j}tj||g|d}nt|tjv rt|}nt|}tj||g|d}tj|| j|d}| j|_|S )Nr   r   r   )r   r   r   rY   g      ?2000Z2001)rV   rd   ) r   rd   rZ   r   rb   r   rw   DatetimeTZDtype	Timestampr   CategoricalDtypecatr   r   r   r,   arrayr+   ZPeriodDtyper   PeriodZSparseDtyper*   subtypeZIntervalDtyper   r&   r   r   r   rV   r   )sr   rZ   r3   entryZcatsr   re   r;   r;   r<   r     sF   





r   c                 C  
   t | jS rX   )r   _metar^   r;   r;   r<   _meta_lib_from_array_da  s   
r   c                 C     t S rX   )rw   r^   r;   r;   r<   _meta_lib_from_array_numpy     r   Fc                 C  s   t jjj| ||dS )N)sort_categoriesignore_order)rw   apitypesr   )Zto_unionr   r   r;   r;   r<   union_categoricals_pandas  s   r   c                 C  r   rX   )r   r[   r;   r;   r<   get_parallel_type_series     r   c                 C  r   rX   )r   r   r;   r;   r<   get_parallel_type_dataframe  r   r   c                 C  r   rX   )r   r   r;   r;   r<   get_parallel_type_index  r   r   c                 C  r   rX   )r   r   )or;   r;   r<   get_parallel_type_frame     
r   c                 C  r   rX   )r   r   r;   r;   r<   get_parallel_type_object  r   r   Tutf8c                 C  s   t jj| ||||dS )N)rd   encodinghash_key
categorize)rw   utilZhash_pandas_object)rl   rd   r   r   r   r;   r;   r<   hash_object_pandas  s   
r   c                      s   e Zd Zd fddZ  ZS )ShuffleGroupResultrt   r6   c                   s8   t   }|  D ]\}}|t|7 }|t|7 }q	|S )ag  
        The result of the shuffle split are typically small dictionaries
        (#keys << 100; typically <= 32) The splits are often non-uniformly
        distributed. Some of the splits may even be empty. Sampling the
        dictionary for size estimation can cause severe errors.

        See also https://github.com/dask/distributed/issues/4962
        )super
__sizeof__r   r.   )self
total_sizekdf	__class__r;   r<   r     s
   
	zShuffleGroupResult.__sizeof__)rt   r6   )rM   rN   rO   r   __classcell__r;   r;   r   r<   r     s    r   c                   s|   t |r|j}tjj|jtjdd|\}}| 	| |
 } fddt|d d |dd  D }ttt||S )NFrf   c                   s8   g | ]\}}r j || jd dn j || qS T)Zdrop)rb   reset_index)r   r   r   Zdf2ignore_indexr;   r<   r   *  s    &z&group_split_pandas.<locals>.<listcomp>r   r   )r0   ry   rw   Z_libsZalgosZgroupsort_indexerastyper   ZintpZtakeZcumsumzipr   r   )r   r   r   r   Zindexer	locationspartsr;   r   r<   group_split_pandas!  s   
r   outerc                   s  | dd}|dkrtj f||d|S t d tjrt d tjrOtdt D ]}t | tjs@ | d |< q-tjt	 |d d j
dS t d tjr d  dd  }	tfd	d
|	D r fddtjD }
tjj|
jdS jftdd
 |	D  }t|}z
tjj|jdW S  ty   t| Y S w  d  dd  S  d j}t|tjpt|tjotdd
 |jD }|rdd  D }tdd  D }n }d }|rt|d tjrn
tdd
 |D r|str|}|d jdk}n=dd |D }t ) tdt  |r+tdt! tjdd |D fd|i| }W d    n	1 sHw   Y  t|tj"r| r||  jtjfdd|D fd|i|}|j}|j#D ]W}|D ]}|$|}|d ur nq~g }|D ]-}||j%v r|||  qtj&t|ddd}tj'(||j)j*|j)j+}|| qt	||d||< t|s||_qz|j,|jd}n}t  tdt  |rtdt! tj||dd}W d    n	1 sw   Y  nPt|d j-tj.r.|d u rtdd |D }tj"t	||d||d j
dS t  |r<tdt! tj|fd|i|}W d    n	1 sRw   Y  |d ur_||_|S )Nr   Fr   )axisjoinr   category)r   r   c                 3  s(    | ]}t |tjo|j jkV  qd S rX   )r   rw   r   nlevels)r   r   )firstr;   r<   r   K  s
    
z concat_pandas.<locals>.<genexpr>c                   s"   g | ] t  fd dD qS )c                   s   g | ]}|  qS r;   )Z_get_level_valuesr   nr;   r<   r   P  s    z,concat_pandas.<locals>.<listcomp>.<listcomp>)r   )r   )dfsr  r<   r   O  s    z!concat_pandas.<locals>.<listcomp>)r   c                 s  s    | ]}|j V  qd S rX   )Z_values)r   r   r;   r;   r<   r   U  s    c                 s      | ]	}t |tjV  qd S rX   )r   rw   r   r   r;   r;   r<   r   b      c                 S  s   g | ]}|j d dqS r   )r   r   r   r;   r;   r<   r   f  s    c                 S     g | ]}|j qS r;   r   r  r;   r;   r<   r   g      c                 s  r  rX   )r   rw   r   r  r;   r;   r<   r   p  r  c                 S  s2   g | ]}t |tjr|n
| j|jd idqS )r   r   )r   rw   r   to_framerenamerV   r  r;   r;   r<   r   y  s    
ignorec                 S  s   g | ]
}|j d k jqS )r   )rx   r  Tr  r;   r;   r<   r         r   c                   s   g | ]
}||j   qS r;   )r   intersectionr  )not_catr;   r<   r     r  r   i8rY   r
  )r   sortc                 S  r  r;   r   r  r;   r;   r<   r     r	  )rd   rV   )/r~   rw   r   r   r   r   r   r   r   r   rV   r   r   r   Zfrom_arraysr   ry   r   r   Zconcatenatefrom_tuplesr   appendrd   anyr   r   r   rx   warningscatch_warningssimplefilterRuntimeWarningFutureWarningr   
differencegetr   fullr   r   r   r   r   ZreindexrZ   r   )r  r   r   uniformZfilter_warningr   r:   r   r   restZarraysZ	to_concatZ
new_tuplesZ
dfs0_indexZhas_categoricalindexZdfs2indZdfs3Zcat_maskre   Ztemp_indcolr   sampler   r   r3   r;   )r  r  r  r<   concat_pandas1  s   













r$  c                 C  s   t jjj| |dS )Nr   )rw   r   r   r   r   r;   r;   r<   categorical_dtype_pandas  s   r%  c                 C     |   S rX   tolistrl   r;   r;   r<   tolist_numpy_or_pandas  r\   r*  c                 C  s"   t | dr	| j}n| }t|tjS )NrZ   )r   rZ   r   rw   r   )rl   rZ   r;   r;   r<   is_categorical_dtype_pandas  s   
r+  c                 C  s
   t jjjS rX   )rw   coregroupbyZGrouperr)  r;   r;   r<   get_grouper_pandas  r   r.  linearc                 C  s   t | ||S rX   r   )r   qinterpolationr;   r;   r<   
percentile  s   r2  c                 K  r]   rX   r;   )r3   r:   r;   r;   r<   to_pandas_dispatch_from_pandas  r   r3  c                   @  s*   e Zd ZdZedd Zed	ddZdS )
PandasBackendEntrypointzPandas-Backend Entrypoint Class for Dask-DataFrame

    Note that all DataFrame-creation functions are defined
    and registered 'in-place' within the ``dask.dataframe``
    ``io`` module.
    c                 C  r   rX   )r"   )clsr;   r;   r<   to_backend_dispatch  r   z+PandasBackendEntrypoint.to_backend_dispatchr3   r   c                 K  s2   t |jtjtjtjfr|S |j|  fi |S rX   )r   r   rw   r   r   r   Zmap_partitionsr6  )r5  r3   r:   r;   r;   r<   
to_backend  s   z"PandasBackendEntrypoint.to_backendN)r3   r   )rM   rN   rO   rP   classmethodr6  r7  r;   r;   r;   r<   r4    s    
r4  cudfc                  C  s   dd l } d S ro   Z	dask_cudfr:  r;   r;   r<   _register_cudf  s   r;  cupyc                    sX   z!dd l  dd l} t| j fdd}t| jdd }W d S  ty+   Y d S w )Nr   c                   s    S rX   r;   r^   r9  r;   r<   meta_lib_from_array_cupy  r   z8_register_cupy_to_cudf.<locals>.meta_lib_from_array_cupyc                 S  r&  rX   r'  r^   r;   r;   r<   tolist_cupy!  r\   z+_register_cupy_to_cudf.<locals>.tolist_cupy)r9  r<  r   registerndarrayr$   ImportError)r<  r>  r?  r;   r=  r<   _register_cupy_to_cudf  s   

rC  rX   )FF)Tr   NT)F)r   r   FTF)NF)r/  )|
__future__r   r  collections.abcr   numpyr   rR   rw   Zpandas.api.typesr   r   Zdask.array.corer   Zdask.array.dispatchr   Zdask.array.percentiler	   Zdask.backendsr
   r   Zdask.dataframe._compatr   r   Zdask.dataframe.corer   r   r   r   r   Zdask.dataframe.dispatchr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   Zdask.dataframe.extensionsr&   r'   Zdask.dataframe.utilsr(   r)   r*   r+   r,   Zdask.sizeofr-   r.   Z
dask.utilsr/   r0   r1   r2   rS   r@  rZ   r[   r   Z	Timedeltar   ZIntervalr   rh   __annotations__Zscipy.sparsesparsespZspmatrixrB  rn   rq   r   r   r   r   r   ru   r   r   r   r   r   rA  r   r   r   r   r   r   r   r   r   r4   r   r   r$  r%  r*  r   
extensionsZExtensionDtyper+  r.  r2  r3  r4  Zregister_backendZregister_lazyr;  rC  r;   r;   r;   r<   <module>   s    Ts












A





E,










 



