
    tf\.                        d dl mZ d dlZd dlZd dlmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZmZ erenej6                  Zdd
ddddefdZddZ	 	 	 	 	 ddZy)    )annotationsN)is_list_like	is_scalar)methods)PANDAS_GE_200)	DataFrameSeriesapply_concat_applymap_partitionshas_known_categories)
no_default)Mget_meta_library_Fc                    t         t        j                  t        j                  f      rt        j                   f|||||||d|S d}	d}
t         t              r6t        j                         st        |	      t               st        |
      t         t              r|x j                  dk(  j                         rt        |	       j                  dk(  j                         rt        |	       j                  j                  dg      j                  }nt         fd|D              st        |	      t         fd	|D              st        |
      t        t!               j                   f|||||||d|S )
a
  
    Convert categorical variable into dummy/indicator variables.

    Data must have category dtype to infer result's ``columns``.

    Parameters
    ----------
    data : Series, or DataFrame
        For Series, the dtype must be categorical.
        For DataFrame, at least one column must be categorical.
    prefix : string, list of strings, or dict of strings, default None
        String to append DataFrame column names.
        Pass a list with length equal to the number of columns
        when calling get_dummies on a DataFrame. Alternatively, `prefix`
        can be a dictionary mapping column names to prefixes.
    prefix_sep : string, default '_'
        If appending prefix, separator/delimiter to use. Or pass a
        list or dictionary as with `prefix.`
    dummy_na : bool, default False
        Add a column to indicate NaNs, if False NaNs are ignored.
    columns : list-like, default None
        Column names in the DataFrame to be encoded.
        If `columns` is None then all the columns with
        `category` dtype will be converted.
    sparse : bool, default False
        Whether the dummy columns should be sparse or not.  Returns
        SparseDataFrame if `data` is a Series or if all columns are included.
        Otherwise returns a DataFrame with some SparseBlocks.

        .. versionadded:: 0.18.2

    drop_first : bool, default False
        Whether to get k-1 dummies out of k categorical levels by removing the
        first level.

    dtype : dtype, default bool
        Data type for new columns. Only a single dtype is allowed.

        .. versionadded:: 0.18.2

    Returns
    -------
    dummies : DataFrame

    Examples
    --------
    Dask's version only works with Categorical data, as this is the only way to
    know the output shape without computing all the data.

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> s = dd.from_pandas(pd.Series(list('abca')), npartitions=2)
    >>> dd.get_dummies(s)
    Traceback (most recent call last):
        ...
    NotImplementedError: `get_dummies` with non-categorical dtypes is not supported...

    With categorical data:

    >>> s = dd.from_pandas(pd.Series(list('abca'), dtype='category'), npartitions=2)
    >>> dd.get_dummies(s)  # doctest: +NORMALIZE_WHITESPACE
    Dask DataFrame Structure:
                       a      b      c
    npartitions=2
    0              bool  bool  bool
    2                ...    ...    ...
    3                ...    ...    ...
    Dask Name: get_dummies, 2 graph layers
    >>> dd.get_dummies(s).compute()  # doctest: +ELLIPSIS
           a      b      c
    0   True  False  False
    1  False   True  False
    2  False  False   True
    3   True  False  False

    See Also
    --------
    pandas.get_dummies
    )prefix
prefix_sepdummy_nacolumnssparse
drop_firstdtypez`get_dummies` with non-categorical dtypes is not supported. Please use `df.categorize()` beforehand to convert to categorical dtype.z`get_dummies` with unknown categories is not supported. Please use `column.cat.as_known()` or `df.categorize()` beforehand to ensure known categoriesobjectstringcategory)includec              3  N   K   | ]  }t        j                  |           y wN)r   is_categorical_dtype.0cdatas     `/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/dask/dataframe/reshape.py	<genexpr>zget_dummies.<locals>.<genexpr>   s      Nw33DG<Ns   "%c              3  :   K   | ]  }t        |           y wr   r   r!   s     r%   r&   zget_dummies.<locals>.<genexpr>   s     BQ'Q0Bs   )
isinstancepdr	   r   get_dummiesr   r    NotImplementedErrorr   dtypesany_metaselect_dtypesr   allr   r   )r$   r   r   r   r   r   r   r   kwargsnot_cat_msgunknown_cat_msgs   `          r%   r*   r*      s   t $BLL12~~

!!

 

 
	
	( 	  $++D1%k22#D)%o66	D)	$?x',,.)+66x',,.)+66jj..
|.DLLGNgNN)+66B'BB%o66**       c                   t        |      r|t        d      t        |      r|t        d      t        j                  | |         st        d      t	        | |         st        d      t        |      r"t        |D cg c]  }t        |       c}      st        |      st        d      g d}t        |      r||vr$t        dd	j                  d
 |D              z         t        j                  | |   j                  j                  |      }t        |      r|}n.t        j                  j                  t        |      |fd|g      }|dv rt        |      rFt        j                  || |   j                   t        j"                  | j$                  |               }	nt        j                  |t        j"                  | j$                  |               }	|D ])  }
|	|
   j'                  | |   j(                  |
         |	|
<   + nFt        j                  |t*        j,                  t        j"                  | j$                  |               }	|||d}|dv r.t/        | gt        j0                  t        j2                  |	d|      }|dv r.t/        | gt        j4                  t        j2                  |	d|      }|dk(  rS |dk(  rS |dk(  rz  S |dk(  r.t/        | gt        j6                  t        j8                  |	d|      S |dk(  r.t/        | gt        j:                  t        j<                  |	d|      S t        c c}w )a  
    Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
    must have category dtype to infer result's ``columns``.
    ``index``, ``columns``, and ``aggfunc`` must be all scalar.
    ``values`` can be scalar or list-like.

    Parameters
    ----------
    df : DataFrame
    index : scalar
        column to be index
    columns : scalar
        column to be columns
    values : scalar or list(scalar)
        column(s) to aggregate
    aggfunc : {'mean', 'sum', 'count', 'first', 'last'}, default 'mean'

    Returns
    -------
    table : DataFrame

    See Also
    --------
    pandas.DataFrame.pivot_table
    Nz.'index' must be the name of an existing columnz0'columns' must be the name of an existing columnz 'columns' must be category dtypezs'columns' must have known categories. Please use `df[columns].cat.as_known()` beforehand to ensure known categoriesz4'values' must refer to an existing column or columns)meansumcountfirstlastzaggfunc must be either z, c              3  (   K   | ]
  }d | d   yw)'N )r"   xs     r%   r&   zpivot_table.<locals>.<genexpr>   s     1WqAaS(1Ws   )name)names)r9   r:   )r   r   index)r   rA   )rA   r   values)r7   r6   pivot_table_sum)chunk	aggregatemetatokenchunk_kwargs)r8   r6   pivot_table_countr7   r8   r6   r9   pivot_table_firstr:   pivot_table_last)r   
ValueErrorr   r    r   r   r0   joinr)   CategoricalIndexcat
categories
MultiIndexfrom_productsortedr   r   Indexr.   astyper,   npfloat64r
   	pivot_sum	pivot_aggpivot_countpivot_firstpivot_agg_first
pivot_lastpivot_agg_last)dfrA   r   rB   aggfuncvavailable_aggfuncscolumns_contentsnew_columnsrF   	value_colr1   pv_sumpv_counts                 r%   pivot_tablerh      s,   6 Uu}IJJWKLL''74;<<7,
 	
 	Vv.!1./VOPPBW0B!B%		1WDV1W(WW
 	
 **2g;??+E+EGT&mm00F^-.tWo 1 
 ##V<<#j&&hhrxx/D <<#hhrxx/D $ W	"&y/"8"8F9J9J99U"VYW ||rzz"((5/9R
 FCF/!#D##''#
 ##%D%%''%
 %	G		F	  	G	!D%%--%
 	
 
F	!D$$,,$
 	
 w /s   M c                    t         j                  j                  ddi      5  | j                  t        j
                  t        |||||d      cddd       S # 1 sw Y   yxY w)a  
    Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set.

    This function is useful to massage a DataFrame into a format where one or more columns are identifier variables
    (``id_vars``), while all other columns, considered measured variables (``value_vars``), are "unpivoted" to the row
    axis, leaving just two non-identifier columns, 'variable' and 'value'.

    Parameters
    ----------
    frame : DataFrame
    id_vars : tuple, list, or ndarray, optional
        Column(s) to use as identifier variables.
    value_vars : tuple, list, or ndarray, optional
        Column(s) to unpivot. If not specified, uses all columns that
        are not set as `id_vars`.
    var_name : scalar
        Name to use for the 'variable' column. If None it uses
        ``frame.columns.name`` or 'variable'.
    value_name : scalar, default 'value'
        Name to use for the 'value' column.
    col_level : int or string, optional
        If columns are a MultiIndex then use this level to melt.

    Returns
    -------
    DataFrame
        Unpivoted DataFrame.

    See Also
    --------
    pandas.DataFrame.melt
    zdataframe.convert-stringFmelt)rF   id_vars
value_varsvar_name
value_name	col_levelrG   N)daskconfigsetr   r   rj   r   )framerk   rl   rm   rn   ro   s         r%   rj   rj   =  s_    R 
4e<	= 

##FF!! $ 	


 

 

s   +AA )NNNr6   )NNNvalueN) 
__future__r   numpyrV   pandasr)   pandas.api.typesr   r   rp   dask.dataframer   dask.dataframe._compatr   dask.dataframe.corer   r	   r
   r   dask.dataframe.utilsr   dask.typingr   
dask.utilsr   r   booluint8_get_dummies_dtype_defaultr*   rh   rj   r=   r4   r%   <module>r      st    "   4  " 0 U U 5 " * &3T 
 
$RtD\ 3
r4   