
    >ieG                    z   U d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ g dZdddifdddifdddifdi fdddifdZded<   e G d d                      Ze G d d                      Z e G d d                      Z!e G d d                      Z"dYdZ#de$d dfdZd-Z%g d.Z&d[d\d3Z'd]d4Z(d^d5Z)e*e#e$e%e+e(e,e(d6e(d7e(d8e)d9e%d:e%d;e%d<e%d=e#d>e#d?e#d@e#iZ-dAedB<    G dC dDe          Z.dE Z/d_dHZ0d`dMZ1erdNndOZ2dPdQddRdSe2 dfdTZ3dadbdXZ4dS )c    )annotationsN)asdict	dataclassfield)AnyCallablecast)PANDAS_GE_220)is_object_string_dtype)tokenize)from_map)DataFrameIOFunction)random_state_data)make_timeseries	with_spec
ColumnSpecRangeIndexSpecDatetimeIndexSpecDatasetSpec lam  scalehigh)r   g      ?)r   )poissonnormaluniformbinomialrandomz1dict[str, tuple[tuple[Any, ...], dict[str, Any]]]default_int_argsc                     e Zd ZU dZdZded<   	 dZded<   	 dZded	<   	 dZd
ed<   	  e	e
          Zded<   	 dZd
ed<   	 dZd
ed<   	 dZd
ed<   	 dZded<   	 dZded<   	  e	e          Zded<   	  e	e          Zded<   dS )r   a  Encapsulates properties of a family of columns with the same dtype.
    Different method can be specified for integer dtype ("poisson", "uniform",
    "binomial", etc.)

    Notes
    -----
    This API is still experimental, and will likely change in the futureN
str | Noneprefixzstr | type | Nonedtype   intnumber
int | Nonenuniquedefault_factorylistchoiceslowr   lengthFboolr   methodtuple[Any, ...]argszdict[str, Any]kwargs)__name__
__module____qualname____doc__r#   __annotations__r$   r'   r)   r   r,   r-   r.   r   r/   r   r1   tupler3   dictr4   r       6lib/python3.11/site-packages/dask/dataframe/io/demo.pyr   r   $   sB        L L FE#E####6FOOOO[ GIE$///G////AC D.FYF FJ "E%888D8888&"U4888F888822r<   r   c                  2    e Zd ZU dZeZded<   	 dZded<   dS )r   zProperties of the dataframe RangeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the future
str | typer$   r%   r&   stepN)r5   r6   r7   r8   r&   r$   r9   r@   r   r<   r=   r   r   W   sA         L L EDMMMMr<   r   c                  R    e Zd ZU dZeZded<   	 dZded<   	 dZded	<   	 dZ	ded
<   dS )r   zProperties of the dataframe DatetimeIndex

    Notes
    -----
    This API is still experimental, and will likely change in the futurer?   r$   Nr"   start1Hstrfreqpartition_freq)
r5   r6   r7   r8   r&   r$   r9   rB   rE   rF   r   r<   r=   r   r   f   sm         L L EE"D4!%N%%%%00r<   r   c                  z    e Zd ZU dZdZded<   	 dZded<   	  ee          Z	ded	<   	  ee
          Zd
ed<   dS )r   zDefines a dataset with random data, such as which columns and data types to generate

    Notes
    -----
    This API is still experimental, and will likely change in the futurer%   r&   npartitionsr   nrecordsr*   z"RangeIndexSpec | DatetimeIndexSpec
index_speczlist[ColumnSpec]column_specsN)r5   r6   r7   r8   rH   r9   rI   r   r   rJ   r,   rK   r   r<   r=   r   r   {   s         L L K& H-5:U&6 6 6J     "%*U4%@%@%@L@@@@$$r<   r   Fc                    |                     dd            |                     dd            |r |j        dd| i|S |                    |           dz  dz
  S )Nr$   r3   size   r%   r   )popr   rand)nrstater   r4   s       r=   
make_floatrS      si    
JJw
JJvt /v}..!.v...;;q>>A!!r<   r   rQ   r&   rR   r   r   r0   r$   r?   r1   str | Callabler3   r2   c                    fd}|r$ |d          \  }}	d|	v rd} |j         |d| i|	}
nIt          |t                    r) ||          \  }}	t          ||          } ||d| i|	}
n ||| d}
|
S )Nc                    t                               | di f          \  }}|                                } |j        di  rn|}||fS )Nr   )r    getcopyupdate)_methodhandler_argshandler_kwargsr3   r4   s      r=   _with_defaultsz make_int.<locals>._with_defaults   sd    '7';';Gb"X'N'N$n',,..'''''#5tt^++r<   r   r.   r   rM   )staterM   )randint
isinstancerD   getattr)rQ   rR   r   r$   r1   r3   r4   r]   r[   r\   datahandlers        ``     r=   make_intrd      s    , , , , , ,  A'5~h'?'?$nN""Lv~|F!F~FFfc"" 	A+9>&+A+A(L.ff--G7LCqCNCCDD 64vA@@@@DKr<   )AliceBobCharlieDanEdithFrankGeorgeHannahIngridJerryKevinLauraMichaelNorbertOliverPatriciaQuinnRaySarahTimUrsulaVictorWendyXavierYvonneZelda   r/   return	list[str]c                    t          t          j        t          j        z   t          j        z   dz             fdt          |           D             S )N c                d    g | ],}d                                                              -S ) rM   )joinchoice).0_r-   r/   rR   s     r=   
<listcomp>z&make_random_string.<locals>.<listcomp>   s5    KKKQBGGFMM'M7788KKKr<   )r,   stringascii_lettersdigitspunctuationrange)rQ   rR   r/   r-   s    ``@r=   make_random_stringr      sO    6'&-7&:LLsRSSGKKKKKK%((KKKKr<   c                    |                     dd            |rt          | ||          S |pt          }|                    ||           S )Nr3   )r/   r   )rO   r   namesr   )rQ   rR   r-   r   r/   r4   s         r=   make_stringr      sS    
JJvt <!!VF;;;;G==q=)))r<   c                <   |                     dd            |8t          t          |                    fdt          |          D             }n	|pt          }t
          j                            |                    dt          |          |           |          S )Nr3   c                Z    g | ]'}t          |d z                                           (S )r%   )rD   zfill)r   xcat_lens     r=   r   z$make_categorical.<locals>.<listcomp>   s1    EEE3q1u::##G,,EEEr<   r   r   )	rO   lenrD   r   r   pdCategorical
from_codesr_   )rQ   rR   r-   r)   r4   r   s        @r=   make_categoricalr      s    
JJvtc'll##EEEEeGnnEEE"U>$$V^^As7||!^%L%LgVVVr<   zstring[python]zstring[pyarrow]categoryint8int16int32int64float8float16float32float64zdict[type | str, Callable]makec                  <    e Zd ZdZddZed             Zd Zd ZdS )MakeDataframePartzU
    Wrapper Class for ``make_dataframe_part``
    Makes a timeseries partition.
    Nc                    || _         |p t          |                                          | _        || _        || _        d S N)index_dtyper,   keys_columnsdtypesr4   )selfr   r   r4   columnss        r=   __init__zMakeDataframePart.__init__
  s8    &64#6#6r<   c                    | j         S r   )r   )r   s    r=   r   zMakeDataframePart.columns  s
    }r<   c                `    || j         k    r| S t          | j        | j        | j        |          S )zUReturn a new MakeTimeseriesPart object with
        a sub-column projection.
        r   )r   r   r   r   r4   )r   r   s     r=   project_columnsz!MakeDataframePart.project_columns  s@     dl""K KK	
 
 
 	
r<   c           	     v    |\  }}t          | j        |d         |d         | j        | j        || j                  S )Nr   r%   )make_dataframe_partr   r   r   r4   )r   part	divisions
state_datas       r=   __call__zMakeDataframePart.__call__!  sD     $	:"aLaLKLK
 
 	
r<   r   )	r5   r6   r7   r8   r   propertyr   r   r   r   r<   r=   r   r     sk         
      X
 
 


 

 

 

 

r<   r   c                ^   t           j                            |          }t          j        j                            |           r,t          j        |||                    d          d          }nyt          j        j        	                    |           rC|                    d          }	t          j
        |||	z   |	                              |           }nt          d|            t          |||||          }
|
j        d         |k    r |
j        d d         }
|
j        d         |k     |
S )NrE   	timestamp)rB   endrE   name)rB   stopr@   zUnhandled index dtype: )npr   RandomStater   apitypesis_datetime64_any_dtype
date_rangerW   is_integer_dtype
RangeIndexastype	TypeErrormake_partitionindexiloc)r   rB   r   r   r   r   r4   r^   r   r@   dfs              r=   r   r   .  s"   I!!*--E	v|++K88 ASvzz&'9'9
 
 
 
	&	&{	3	3 Azz&!!Ed
FFFMM
 
 ?+??@@@		>	>B
(2,#

WSbS\ (2,#

Ir<   abc                    t           j        j                            | |          pt	          |           ot	          |          S )zOSame as pandas.api.types.is_dtype_equal, but also returns True for str / object)r   r   r   is_dtype_equalr   )r   r   s     r=   same_astyper   B  s<    6<&&q!,, q!!?&<Q&?&?r<   r   r,   r   dict[str, type | str]c                z   
 i }|                                 D ]O\  }fd|                                 D             }t          |         t          |          |fi |} v r||<   Pt          j        ||           
 
fd|                                 D             }	|	r
                    |	d          

S )Nc                    i | ]B\  }}|                     d d          d         k    %|                     d d          d         |CS )r   r%   r   )rsplit)r   kkvks      r=   
<dictcomp>z"make_partition.<locals>.<dictcomp>L  sZ     
 
 
Ayya  #q(( IIc1a !(((r<   )r   r   c                Z    i | ]'\  }}|v 	t          ||         j                  $||(S r   )r   r$   )r   r   r   r   r   s      r=   r   z"make_partition.<locals>.<dictcomp>Y  sD       Aq<<Ar!u{ ; ;< 	
1<<r<   F)rX   )itemsr   r   r   	DataFramer   )r   r   r   r4   r^   rb   dtkwsresultupdate_dtypesr   r   s   `         @@r=   r   r   I  s    D  2
 
 
 

 
 
 b#e**e33s33<<DG	d%	9	9	9B    LLNN  M
  2YY}5Y11Ir<   MEMz
2000-01-01z
2000-12-3110s1c                   |t           t          t          t          d}t          t	          j        | ||                    }t          |          dz
  }|"t          j        	                    d|          }	nt          ||          }	g }
t          t          |          dz
            D ]*}|
                    |||dz            |	|         f           +||d<   d	}t          t	          j        d
|d                    \  }}t          t          |||          |
t          ||||t          |                                          |	d         |          |dt#          | |||||	          d          S )aS  Create timeseries dataframe with random data

    Parameters
    ----------
    start: datetime (or datetime-like string)
        Start of time series
    end: datetime (or datetime-like string)
        End of time series
    dtypes: dict (optional)
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq: string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq: string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed: int (optional)
        Randomstate seed
    kwargs:
        Keywords to pass down to individual column creation functions.
        Keywords should be prefixed by the column name and then an underscore.

    Examples
    --------
    >>> import dask.dataframe as dd
    >>> df = dd.demo.make_timeseries('2000', '2010',
    ...                              {'value': float, 'name': str, 'id': int},
    ...                              freq='2H', partition_freq='1D', seed=1)
    >>> df.head()  # doctest: +SKIP
                           id      name     value
    2000-01-01 00:00:00   969     Jerry -0.309014
    2000-01-01 02:00:00  1010       Ray -0.760675
    2000-01-01 04:00:00  1016  Patricia -0.063261
    2000-01-01 06:00:00   960   Charlie  0.788245
    2000-01-01 08:00:00  1031     Kevin  0.466002
    N)r   idr   yrB   r   rE   r%       eAr   rN   rE   zdatetime64[ns]2000)rB   rE   periodsr   zmake-timeseriesFmetar   labeltokenenforce_metadata)rD   r&   floatr,   r   r   r   r   r   r_   r   r   appendr   r   r   r   r   )rB   r   r   rE   rF   seedr4   r   rH   r   partsir   
meta_startmeta_ends                  r=   r   r   f  s   X ~Su5AAR]CnMMMNNIi..1$K| Y&&s&==

&{D99
 E3y>>A%&& < <iAE	*JqM:;;;;F6N"KFq Q Q QRRJ +vv66 qM
 
 uc64LL   r<   specr   r(   c                   t          | j                  dk    rXt          ddddd          t          dt          d          t          d	d
g d          t          dt                    g| _        g }i }t          | j        t                    rt          j	        | j        j
                  }| j        j        }| j        j        }t          j	        | j        j
                  | j        t          j        |          z  z   }t          t          j        |||                    d         |k     r                    |           ||t          j        |          z   }	}nt          | j        t$                    r{| j        j        }| j        |z  | j        z  }| j        |z  dz
  }t          t          j        d||                    d         |dz   k     r                    |dz              d|}	}nt-          d| j                   d|i}
| j        D ]G}|j        r|j        }nxt          |j        t                    r0t3          j        dd|j                                       d          }n.t9          |j        d          r|j        j        }n|j        j        }t?          |j                   D ]}|dz   }| | x|v r|dz   }| | x|v |                               |j        |<   |
!                    fdtE          |          #                                D                        |j$        #                                D ]\  }}||
 d| <   It                    dz
  }|MtK          t          tL                   tN          j(        )                    tU          d          |                    ntW          ||          fdt?          |          D             }tY          t[          | j        j        ||
|          |t]          | j        j        ||	||d         |
          dt_          d| j        |||          d           S )!aL  Generate a random dataset according to provided spec

    Parameters
    ----------
    spec : DatasetSpec
        Specify all the parameters of the dataset
    seed: int (optional)
        Randomstate seed

    Notes
    -----
    This API is still experimental, and will likely change in the future

    Examples
    --------
    >>> from dask.dataframe.io.demo import ColumnSpec, DatasetSpec, with_spec
    >>> ddf = with_spec(
    ...     DatasetSpec(
    ...         npartitions=10,
    ...         nrecords=10_000,
    ...         column_specs=[
    ...             ColumnSpec(dtype=int, number=2, prefix="p"),
    ...             ColumnSpec(dtype=int, number=2, prefix="n", method="normal"),
    ...             ColumnSpec(dtype=float, number=2, prefix="f"),
    ...             ColumnSpec(dtype=str, prefix="s", number=2, random=True, length=10),
    ...             ColumnSpec(dtype="category", prefix="c", choices=["Y", "N"]),
    ...         ],
    ...     ), seed=42)
    >>> ddf.head(10)  # doctest: +SKIP
         p1    p2    n1    n2        f1        f2          s1          s2 c1
    0  1002   972  -811    20  0.640846 -0.176875  L#h98#}J`?  _8C607/:6e  N
    1   985   982 -1663  -777  0.790257  0.792796  u:XI3,omoZ  w~@ /d)'-@  N
    2   947   970   799  -269  0.740869 -0.118413  O$dnwCuq\  !WtSe+(;#9  Y
    3  1003   983  1133   521 -0.987459  0.278154  j+Qr_2{XG&  &XV7cy$y1T  Y
    4  1017  1049   826     5 -0.875667 -0.744359  bJ3E-{:o  {+jC).?vK+  Y
    5   984  1017  -492  -399  0.748181  0.293761  ~zUNHNgD"!  yuEkXeVot|  Y
    6   992  1027  -856    67 -0.125132 -0.234529  j.7z;o]Gc9  g|Fi5*}Y92  Y
    7  1011   974   762 -1223  0.471696  0.937935  yT?j~N/-u]  JhEB[W-}^$  N
    8   984   974   856    74  0.109963  0.367864  _j"&@ i&;/  OYXQ)w{hoH  N
    9  1030  1001  -792  -262  0.435587 -0.647970  Pmrwl{{|.K  3UTqM$86Sg  N
    r   r   r   i@B T)r#   r$   r.   r   r   f)r#   r$   r   cr   )r   r   r  d)r#   r$   r-   s)r#   r$   r   r   r%   )r   r@   zUnhandled index: rE   z[^a-zA-Z0-9]r   r   c                :    i | ]\  }}|d v	|dg fv d| |S )>   r4   r'   r#   Nr   r   )r   r   r   col_names      r=   r   zwith_spec.<locals>.<dictcomp>  sR       1 >>>1TSUJCVCV  %%!%%qCVCVCVr<   Nr   r   c                :    g | ]}||d z            |         fS )rN   r   )r   r   r   r   s     r=   r   zwith_spec.<locals>.<listcomp>&  s/    OOOqiAE	"JqM2OOOr<   r   zmake-randomFr   )0r   rK   r   r   rD   r`   rJ   r   r   	TimestamprB   rE   rF   rI   	Timedeltar,   r   r   r   r@   rH   r   
ValueErrorr#   r$   resubrstriphasattrr   r5   r   r'   rY   r   r   r4   r	   r   r   r   r_   r&   r   r   r   r   r   )r   r   r   r   rB   r@   rF   r   r   r   r4   colr#   r   col_nkw_namekw_valrH   r   r  r   r   s                      @@@r=   r   r     s   T 4""caiPTUUUct<<<c=Q=Q=QRRRc---	
 GF $/#455 @T_233#7l4?011DMBLQUDVDV4VVU.QQQRR	R=3S!!!$ebl4.@.@&@H

	DO^	4	4 	@#-1AAmd"Q&qsHHHII	R=C!G$$S1W%%% $H

>T_>>???$d^F  9 9: 	(ZFF	3'' 	(VOSSY.AAHHMMFFSY'' 	(Y^FFY'Fsz"" 	9 	9AEE"(1%1118f<<	 #)1%1118f<<NN8$$$"yF8MM    &s 1 1 3 3     $':#3#3#5#5 9 928(..W..//9	9" i..1$K|$s)RY%6%6s3xxk%6%R%RSS

&{D99
OOOOOE+<N<NOOOE$//QQQ O!qM
 
 q$-~zRR   r<   )F)rQ   r&   rR   r   r   r0   r$   r?   r1   rT   r3   r2   )r   )r/   r&   r   r   )NFN)NN)r   r?   r   r?   )r   r,   r   r   r   )r   r   r   r(   )5
__future__r   r
  r   dataclassesr   r   r   typingr   r   r	   numpyr   pandasr   dask.dataframe._compatr
   dask.dataframe._pyarrowr   dask.dataframe.corer   dask.dataframe.io.ior   dask.dataframe.io.utilsr   
dask.utilsr   __all__r    r9   r   r   r   r   rS   r&   rd   r   r   r   r   r   rD   objectr   r   r   r   r   _MEr   r   r   r<   r=   <module>r      s   " " " " " " " 				  0 0 0 0 0 0 0 0 0 0 & & & & & & & & & &         0 0 0 0 0 0 : : : : : : ( ( ( ( ( ( ) ) ) ) ) ) 7 7 7 7 7 7 ( ( ( ( ( (   UDM"GT?#VTN#b!fd^$G G      /3 /3 /3 /3 /3 /3 /3 /3d                 1 1 1 1 1 1 1 1( % % % % % % % %." " " " &    B	 	 	<L L L L L
* * * *W W W W 
:
Kk{ 
HXXXjzzz$    &'
 '
 '
 '
 '
+ '
 '
 '
T  (      4 $dd 	s99	R R R Rj} } } } } } }r<   