o
    NrfI                     @  s  U d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ g d
Zdddifdddifdddifdi fdddifdZded< eG dd dZeG dd dZeG dd dZ eG dd dZ!dbddZ"de#d dfdcd-d.Z$g d/Z%ddded4d5Z&dfd6d7Z'dgd8d9Z(e)e"e#e$e*e'e+e'd:e'd;e'd<e(d=e$d>e$d?e$d@e$dAe"dBe"dCe"dDe"iZ,dEedF< G dGdH dHeZ-dIdJ Z.dhdMdNZ/didSdTZ0erdUndVZ1dWdXddYdZe1 dfd[d\Z2djdkd`daZ3dS )l    )annotationsN)asdict	dataclassfield)AnyCallablecast)PANDAS_GE_220PANDAS_GE_300)is_object_string_dtype)tokenize)DataFrameIOFunction)random_state_data)make_timeseries	with_spec
ColumnSpecRangeIndexSpecDatetimeIndexSpecDatasetSpec Zlam  scalehigh)r   g      ?)r   )poissonnormaluniformZbinomialrandomz1dict[str, tuple[tuple[Any, ...], dict[str, Any]]]default_int_argsc                   @  s   e Zd ZU dZdZded< 	 dZded< 	 dZded	< 	 dZd
ed< 	 e	e
dZded< 	 dZd
ed< 	 dZd
ed< 	 dZd
ed< 	 dZded< 	 dZded< 	 e	edZded< 	 e	edZded< dS )r   a  Encapsulates properties of a family of columns with the same dtype.
    Different method can be specified for integer dtype ("poisson", "uniform",
    "binomial", etc.)

    Notes
    -----
    This API is still experimental, and will likely change in the futureN
str | Noneprefixzstr | type | Nonedtype   intnumber
int | Nonenuniquedefault_factorylistchoiceslowr   lengthFboolr   methodtuple[Any, ...]argszdict[str, Any]kwargs)__name__
__module____qualname____doc__r   __annotations__r    r#   r%   r   r(   r)   r*   r   r+   r   r-   tupler/   dictr0   r   r   r   _/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/dataframe/io/demo.pyr   #   s4   
 r   c                   @  s,   e Zd ZU dZeZded< 	 dZded< dS )r   zProperties of the dataframe RangeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the future
str | typer    r!   r"   stepN)r1   r2   r3   r4   r"   r    r5   r:   r   r   r   r8   r   V   s   
 r   c                   @  sH   e Zd ZU dZeZded< 	 dZded< 	 dZded	< 	 dZ	ded
< dS )r   zProperties of the dataframe DatetimeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the futurer9   r    Nr   startZ1Hstrfreqpartition_freq)
r1   r2   r3   r4   r"   r    r5   r;   r=   r>   r   r   r   r8   r   e   s   
 r   c                   @  sT   e Zd ZU dZdZded< 	 dZded< 	 eedZ	ded	< 	 ee
dZd
ed< dS )r   zDefines a dataset with random data, such as which columns and data types to generate

    Notes
    -----
    This API is still experimental, and will likely change in the futurer!   r"   npartitionsr   nrecordsr&   z"RangeIndexSpec | DatetimeIndexSpec
index_speczlist[ColumnSpec]column_specsN)r1   r2   r3   r4   r?   r5   r@   r   r   rA   r(   rB   r   r   r   r8   r   z   s   
 r   Fc                 K  sB   | dd  | dd  |r|jdd| i|S || d d S )Nr    r/   size   r!   r   )popr   Zrand)nrstater   r0   r   r   r8   
make_float   s
   rH   r   rF   r"   rG   r   r   r,   r    r9   r-   str | Callabler/   r.   c                   s    fdd}|r!|d\}}	d|	v rd}|j |d| i|	}
|
S t|tr<||\}}	t||}||d| i|	}
|
S | || d}
|
S )Nc                   s@   t | di f\}}| }|jdi   r n|}||fS )Nr   )r   getcopyupdate)_methodhandler_argshandler_kwargsr/   r0   r   r8   _with_defaults   s
   z make_int.<locals>._with_defaultsr   r*   r   rC   )staterC   )randint
isinstancer<   getattr)rF   rG   r   r    r-   r/   r0   rQ   rN   rO   datahandlerr   rP   r8   make_int   s   	


rX   )ZAliceZBobZCharlieZDanZEdithZFrankZGeorgeZHannahZIngridZJerryZKevinZLauraZMichaelZNorbertZOliverZPatriciaZQuinnZRayZSarahZTimZUrsulaZVictorZWendyZXavierZYvonneZZelda   r+   return	list[str]c                   s4   t tjtj tj d   fddt| D S )N c                   s    g | ]}d  j dqS ) rC   )joinchoice).0_r)   r+   rG   r   r8   
<listcomp>   s     z&make_random_string.<locals>.<listcomp>)r(   stringascii_lettersdigitspunctuationrange)rF   rG   r+   r   rc   r8   make_random_string   s   rj   c                 K  s4   | dd  |rt| ||dS |pt}|j|| dS )Nr/   )r+   r^   )rE   rj   namesr`   )rF   rG   r)   r   r+   r0   r   r   r8   make_string   s
   rl   c                   s^   | dd  |d urtt|  fddt|D }n|pt}tj|jdt|| d|S )Nr/   c                   s   g | ]}t |d   qS )r!   )r<   zfill)ra   xZcat_lenr   r8   rd      s    z$make_categorical.<locals>.<listcomp>r   r^   )	rE   lenr<   ri   rk   pdZCategoricalZ
from_codesrS   )rF   rG   r)   r%   r0   r   ro   r8   make_categorical   s   rr   zstring[python]zstring[pyarrow]categoryZint8Zint16Zint32int64Zfloat8Zfloat16Zfloat32Zfloat64zdict[type | str, Callable]makec                   @  s6   e Zd ZdZdddZedd Zdd Zd	d
 ZdS )MakeDataframePartzU
    Wrapper Class for ``make_dataframe_part``
    Makes a timeseries partition.
    Nc                 C  s(   || _ |p
t| | _|| _|| _d S N)index_dtyper(   keys_columnsdtypesr0   )selfrx   r{   r0   columnsr   r   r8   __init__	  s   
zMakeDataframePart.__init__c                 C  s   | j S rw   )rz   )r|   r   r   r8   r}     s   zMakeDataframePart.columnsc                 C  s$   || j kr| S t| j| j| j|dS )zUReturn a new MakeTimeseriesPart object with
        a sub-column projection.
        r}   )r}   rv   rx   r{   r0   )r|   r}   r   r   r8   project_columns  s   
z!MakeDataframePart.project_columnsc                 C  s,   |\}}t | j|d |d | j| j|| jS )Nr   r!   )make_dataframe_partrx   r{   r}   r0   )r|   part	divisions
state_datar   r   r8   __call__   s   zMakeDataframePart.__call__rw   )	r1   r2   r3   r4   r~   propertyr}   r   r   r   r   r   r8   rv     s    

rv   c                 C  s   t j|}tjj| rtj|||ddd}n!tjj	| r4|d}	tj
|||	 |	d| }ntd|  t|||||}
|
jd |krX|
jd d }
|
jd |ksJ|
S )Nr=   	timestamp)r;   endr=   name)r;   stopr:   zUnhandled index dtype: )npr   ZRandomStaterq   apitypesZis_datetime64_any_dtype
date_rangerJ   Zis_integer_dtype
RangeIndexastype	TypeErrormake_partitionindexZiloc)rx   r;   r   r{   r}   r   r0   rR   r   r:   dfr   r   r8   r   -  s    
r   abc                 C  s    t jj| |pt| ot|S )zOSame as pandas.api.types.is_dtype_equal, but also returns True for str / object)rq   r   r   Zis_dtype_equalr   )r   r   r   r   r8   same_astypeA  s   r   r}   r(   r{   dict[str, type | str]c           
        s   i }|  D ]$\}fdd|  D }t| t||fi |} v r*||< qtj|| d fdd|  D }	|	rRtrEi nddi}j|	fi |S )Nc                   s6   i | ]\}}| d dd  kr| d dd |qS )rb   r!   r   )rsplit)ra   kkv)kr   r8   
<dictcomp>K  s
    z"make_partition.<locals>.<dictcomp>)r   r}   c                   s.   i | ]\}}| v rt || js||qS r   )r   r    ra   r   r   )r}   r   r   r8   r   X  s
    rK   F)itemsru   rp   rq   Z	DataFramer
   r   )
r}   r{   r   r0   rR   rV   dtkwsresultZupdate_dtypesr   )r}   r   r   r8   r   H  s"   
	r   ZMEMz
2000-01-01z
2000-12-31Z10s1c              
   K  s<  |du rt tttd}ttj| ||d}t|d }|du r(tjj	d|d}	nt
||}	g }
tt|d D ]}|
|||d  |	| f q7||d< d	}ttjd
|dd\}}ddlm} | rmddlm} i }nddlm} dt| |||||	i}|t||||
ft||||t| |	d ||ddd|S )aS  Create timeseries dataframe with random data

    Parameters
    ----------
    start: datetime (or datetime-like string)
        Start of time series
    end: datetime (or datetime-like string)
        End of time series
    dtypes: dict (optional)
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq: string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq: string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed: int (optional)
        Randomstate seed
    kwargs:
        Keywords to pass down to individual column creation functions.
        Keywords should be prefixed by the column name and then an underscore.

    Examples
    --------
    >>> import dask.dataframe as dd
    >>> df = dd.demo.make_timeseries('2000', '2010',
    ...                              {'value': float, 'name': str, 'id': int},
    ...                              freq='2h', partition_freq='1D', seed=1)
    >>> df.head()  # doctest: +SKIP
                           id      name     value
    2000-01-01 00:00:00   969     Jerry -0.309014
    2000-01-01 02:00:00  1010       Ray -0.760675
    2000-01-01 04:00:00  1016  Patricia -0.063261
    2000-01-01 06:00:00   960   Charlie  0.788245
    2000-01-01 08:00:00  1031     Kevin  0.466002
    N)r   idrn   yr;   r   r=   r!       eAr^   rD   r=   zdatetime64[ns]2000)r;   r=   Zperiodsr   _dask_expr_enabledfrom_maptokenzmake-timeseriesFmetar   labelZenforce_metadata)r<   r"   floatr(   rq   r   rp   r   r   rS   r   ri   appenddask.dataframer   	dask_exprr   dask.dataframe.io.ior   rv   r   ry   )r;   r   r{   r=   r>   seedr0   r   r?   r   partsirx   
meta_startmeta_endr   r   r   r   r   r8   r   f  sL   ,
 

	r   specr   r$   c              
     sL  t | jdkr%tddddddtdtddtd	d
g ddtdtdg| _g }i }t| jtrmt	| jj
}| jj}| jj}t	| jj
| jt|  }ttj|||dd |k rb| ||t| }}	n@t| jtr| jj}| j| | j }| j| d }ttjd||dd |d k r|d  d|}}	ntd| j d|i}
| jD ]~}|jr|j}n"t|jtrtdd|j d}nt|jdr|jj}n|jj}t|j D ]M}|d }| |   |v r|d }| |   |v s|  |j| < |
! fddt"|# D  |j$# D ]\}}||
  d| < q"qqt d }|du rNt%tt& t'j(j)t*d|dnt+||fddt|D }ddl,m-} | rrdd l.m/} i }ndd l0m/} d!t1d| j|||i}|t2| jj||
|d"|ft3| jj||	||d |
d#d$d%|S )&aL  Generate a random dataset according to provided spec

    Parameters
    ----------
    spec : DatasetSpec
        Specify all the parameters of the dataset
    seed: int (optional)
        Randomstate seed

    Notes
    -----
    This API is still experimental, and will likely change in the future

    Examples
    --------
    >>> from dask.dataframe.io.demo import ColumnSpec, DatasetSpec, with_spec
    >>> ddf = with_spec(
    ...     DatasetSpec(
    ...         npartitions=10,
    ...         nrecords=10_000,
    ...         column_specs=[
    ...             ColumnSpec(dtype=int, number=2, prefix="p"),
    ...             ColumnSpec(dtype=int, number=2, prefix="n", method="normal"),
    ...             ColumnSpec(dtype=float, number=2, prefix="f"),
    ...             ColumnSpec(dtype=str, prefix="s", number=2, random=True, length=10),
    ...             ColumnSpec(dtype="category", prefix="c", choices=["Y", "N"]),
    ...         ],
    ...     ), seed=42)
    >>> ddf.head(10)  # doctest: +SKIP
         p1    p2    n1    n2        f1        f2          s1          s2 c1
    0  1002   972  -811    20  0.640846 -0.176875  L#h98#}J`?  _8C607/:6e  N
    1   985   982 -1663  -777  0.790257  0.792796  u:XI3,omoZ  w~@ /d)'-@  N
    2   947   970   799  -269  0.740869 -0.118413  O$dnwCuq\  !WtSe+(;#9  Y
    3  1003   983  1133   521 -0.987459  0.278154  j+Qr_2{XG&  &XV7cy$y1T  Y
    4  1017  1049   826     5 -0.875667 -0.744359  bJ3E-{:o  {+jC).?vK+  Y
    5   984  1017  -492  -399  0.748181  0.293761  ~zUNHNgD"!  yuEkXeVot|  Y
    6   992  1027  -856    67 -0.125132 -0.234529  j.7z;o]Gc9  g|Fi5*}Y92  Y
    7  1011   974   762 -1223  0.471696  0.937935  yT?j~N/-u]  JhEB[W-}^$  N
    8   984   974   856    74  0.109963  0.367864  _j"&@ i&;/  OYXQ)w{hoH  N
    9  1030  1001  -792  -262  0.435587 -0.647970  Pmrwl{{|.K  3UTqM$86Sg  N
    r   r   rt   i@B T)r   r    r*   r   r   f)r   r    r   crs   )r   r   r   d)r   r    r)   s)r   r    r   r   r!   )r   r:   zUnhandled index: r=   z[^a-zA-Z0-9]rb   r   c                   s4   i | ]\}}|d vr|dg fvr  d| |qS )>   r#   r   r0   Nrb   r   r   )col_namer   r8   r   !  s
    zwith_spec.<locals>.<dictcomp>Nr   r^   c                   s$   g | ]} ||d   | fqS )rD   r   )ra   r   )r   r   r   r8   rd   1  s   $ zwith_spec.<locals>.<listcomp>r   r   r   r   zmake-randomFr   )4rp   rB   r   r   r<   rT   rA   r   rq   	Timestampr;   r=   r>   r@   Z	Timedeltar(   r   r   r   r:   r?   r   
ValueErrorr   r    resubrstriphasattrr   r1   ri   r#   rL   r   r   r0   r   r   r   r   rS   r"   r   r   r   r   r   r   r   rv   r   )r   r   r}   r{   r;   r:   r>   r   r   r   r0   colr   r   Zcol_nZkw_nameZkw_valr?   r   r   r   r   r   )r   r   r   r8   r     s   *








 
	r   )F)rF   r"   rG   r   r   r,   r    r9   r-   rI   r/   r.   )rY   )r+   r"   rZ   r[   )NFN)NN)r   r9   r   r9   )r}   r(   r{   r   rw   )r   r   r   r$   )4
__future__r   r   re   dataclassesr   r   r   typingr   r   r   numpyr   Zpandasrq   Zdask.dataframe._compatr	   r
   Zdask.dataframe._pyarrowr   Zdask.dataframe.corer   Zdask.dataframe.io.utilsr   Z
dask.utilsr   __all__r   r5   r   r   r   r   rH   r"   rX   rk   rj   rl   rr   r   r<   objectru   rv   r   r   r   Z_MEr   r   r   r   r   r8   <module>   s    




	2
!

*


`