
    >ie&A                       d dl mZ d dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZ g ZdD ]TZ ej          ej!         e"d	          e
                    Z# ej$        e#d          Z%e&                    e#e%f           U ej          e'd           ej!         e"d	                              Z# ej$        e#d          Z%e&                    e%(                                j)        e%j)        f            ej*         e"d           e"d           ej+        d           e"d           ej+        dd          d          Z, ej*         e"d           e"d           ej+        dd           e"d           ej+        ddd          d          Z- ej*         e"d           e"d           ej+        dd           e"d           ej+        ddd          d          Z.e,e-e.gZ/g Z0e/D ]Z1e1j2        3                    d          e1_2        e1j4        3                    d          e1_4        e0&                    e15                    e1j2        j6        7                     e"d                    e1j4        j6        7                     e"d                     !                     d" e/D             Z8d# e0D             Z9d$ e/D             Z:d% e0D             Z;d& Z<e	j=        >                    d'd( e	j?        d)e	j=        @                    ed*+          e	j=        @                    e d,+          g-           e	j?        de	j=        @                    ed*+          -          g          e	j=        A                    d.          e	j=        A                    d/          d0                                     ZBd1 ZCd2 ZDd3 ZEd4 ZFe	j=        >                    d5g d6          e	j=        >                    d7g d6          d8                         ZGe	j=        >                    d7d9d:g          d;             ZHd< ZId= ZJd> ZKdCd?ZLd@ ZM G dA dB          ZNdS )D    )annotationsN)_compat)PANDAS_GE_150PANDAS_GE_200PANDAS_GE_210tm)to_pyarrow_string)_concat)	assert_eqclear_known_categoriesget_string_dtype	make_metapyarrow_strings_enabled)TFbacbac)ordered   npartitions   indexabcdexxxxx   abcbcf8dtypevwxyzfghijyyyyy
   abbbaklmnozzzzz   bcbcccategoryxyzabc)r!   r#   c                B    g | ]}|                     |j                  S  	set_indexr#   .0is     Elib/python3.11/site-packages/dask/dataframe/tests/test_categorical.py
<listcomp>r8   M   s&    
,
,
,1;;qs
,
,
,    c                B    g | ]}|                     |j                  S r1   r2   r4   s     r7   r8   r8   N   s&    
-
-
-1;;qs
-
-
-r9   c                P    g | ]#}|                     |j        |j        g          $S r1   r3   r#   r"   r4   s     r7   r8   r8   O   s,    
3
3
3q1;;QSz""
3
3
3r9   c                P    g | ]#}|                     |j        |j        g          $S r1   r<   r4   s     r7   r8   r8   P   s,    
4
4
4q1;;QSz""
4
4
4r9   c                 f   t          j        t          t                    t	          j        t                               t          j        t          d t          D                       t	          j        d t          D                                  t          j        t          d t          D                       t	          j        d t          D                       j                   t          j        t          t                    t	          j        t                               t          j        t          d t          D                       t	          j        d t          D                                  t          j        t          d t          D                       t	          j        d t          D                                  t          j        t          d	 t          D                       t	          j        d
 t          D                                  t          j        t          d t          D                       t	          j        d t          D                       j                   t          j        t          t                    t	          j        t                               d S )Nc                    g | ]	}|j         
S r1   r#   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>Y   s    %%%%%%r9   c                    g | ]	}|j         
S r1   r@   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>Y   s    2H2H2H1132H2H2Hr9   c                    g | ]	}|j         
S r1   r   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>^       ***Q***r9   c                    g | ]}|S r1   r1   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>^       7K7K7Ka7K7K7Kr9   c                "    g | ]}|d dg         S r"   r$   r1   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>f   s     0001C:000r9   c                "    g | ]}|d dg         S rG   r1   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>g   s     222Q1c3Z=222r9   c                    g | ]	}|j         
S r1   r$   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>l       &&&&&&r9   c                    g | ]	}|j         
S r1   rJ   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>l       3I3I3IAAC3I3I3Ir9   c                    g | ]	}|j         
S r1   r"   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>q   rK   r9   c                    g | ]	}|j         
S r1   rO   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>q   rM   r9   c                    g | ]	}|j         
S r1   r   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>v   rC   r9   c                    g | ]}|S r1   r1   r4   s     r7   r8   z3test_concat_unions_categoricals.<locals>.<listcomp>v   rE   r9   )r   assert_frame_equalr
   framespdconcatframes2assert_series_equalassert_index_equalframes3frames4r   frames5frames6r1   r9   r7   test_concat_unions_categoricalsr^   S   sH   '&//29W+=+=>>> %%f%%%&&	2H2H2H2H2H(I(I  
 **'***++RY7K7K77K7K7K-L-L-R  
 ''**BIg,>,>??? 0000011
	22'22233   &&g&&&''3I3I3I3I3I)J)J  
 &&g&&&''3I3I3I3I3I)J)J  
 **'***++RY7K7K77K7K7K-L-L-R  
 ''**BIg,>,>?????r9   numeric_onlyTFz"numeric_only=False not implemented)reasonz`numeric_only` not implemented)marksz(ignore:The default value of numeric_onlyzignore:Droppingc                   t          j        d t          t                    D             dt	          ddddddt          d         	          d gd
z            }|                                }t          |j                                        |j                                                   t          |j        	                                |j        	                                           t          rt          j        t          d          nt          j                    }|i nd|i}|5   |                    |j                  j        di |}d d d            n# 1 swxY w Y   |5   |                    |j                  j        di |}d d d            n# 1 swxY w Y   t          ||           |5  |                    |j                  j        	                                }d d d            n# 1 swxY w Y   |5  |                    |j                  j        	                                }d d d            n# 1 swxY w Y   t          ||           |5  |j                            |j                                                  }d d d            n# 1 swxY w Y   |5  |j                            |j                                                  }d d d            n# 1 swxY w Y   t          ||           d S )Nc                    i | ]
\  }}d |f|S unknownr1   r5   r6   dfs      r7   
<dictcomp>z-test_unknown_categoricals.<locals>.<dictcomp>   s"    ===B)Q===r9   re   objectr-   i8r   r   r   )parent_meta   zThe default of observed=Falsematchr_   r1   )dd	DataFrame	enumeraterT   r   computer   r!   value_countsnuniquer   pytestwarnsFutureWarning
contextlibnullcontextgroupbysumr#   count)shuffle_methodr_   ddfrg   ctxnumeric_kwargsexpectedresults           r7   test_unknown_categoricalsr   ~   s4   4 ,==9V+<+<===$ZdSSq		
 	
 	
 

 C 
Bce  ""BD$5$5$7$7888cemmoort||~~... 	&]*IJJJJ#%% 
 (/RRnl5SN	 : :'2::bd##'99.99: : : : : : : : : : : : : : :	 : :'SU##'99.99: : : : : : : : : : : : : : :fh	 0 0::bd##%--//0 0 0 0 0 0 0 0 0 0 0 0 0 0 0	 0 0SU##%--//0 0 0 0 0 0 0 0 0 0 0 0 0 0 0fh	 . .4<<%%++--. . . . . . . . . . . . . . .	 . .su%%++--. . . . . . . . . . . . . . .fhsl   '&EE E&&FFF52G33G7:G7 2H>>II2JJ J&2K$$K(+K(c                 |   t           d         } t                      rt          |           } t          |                               ddi          }t          j        d t          t                    D             d|d gdz                                ddi          }|	                    |j
        j                            g d          	          }|j
        j        j        sJ |j        j        j        rJ |j        j        j        rJ |                                }d
D ]}|du}|                    |          }|j        j        j        sJ |j        j        j        sJ |j        j        j        |k    sJ t'          ||                    ddi          d           |                    |d          }|j        j        j        sJ |j        j        j        sJ |j        j        j        |k    sJ t'          ||                    ddi          d           |                    d|          }|j        j        j        rJ |j        j        j        sJ |j        j        j        |k    sJ t'          ||                    ddi          d           |                    d|          }|j        j        j        sJ |j        j        t-                      k    sJ |j        j        j        |k    sJ t'          ||           |                    g d          }|j        j        j        sJ t'          ||           |                    dgd          |u sJ |                    g d          |u sJ |                    dg          |u sJ |                    g           |u sJ t/          j        t2                    5  |                    d           d d d            n# 1 swxY w Y   t/          j        t2                    5  |                    d           d d d            d S # 1 swxY w Y   d S )Nr   r#   y_)columnsc                    i | ]
\  }}d |f|S rd   r1   rf   s      r7   rh   z#test_categorize.<locals>.<dictcomp>   s"    >>>B)Q>>>r9   re   rl   )r"   r#   r$   )r!   )NTFFr   r    r-   )check_categoricalr   )r   split_everyT)r   r   r!      )r   foo)r[   r   r	   r   renamero   rp   rq   rZ   assignr!   catset_categoriesknownr   r   rr   
categorizer    r   astyper   r   ru   raises
ValueError)pdfmetar~   rg   r   known_indexddf2ddf_known_indexs           r7   test_categorizer      s    !*C   %$$!#&&--sDk-BBD
,>>9W+=+=>>>	
	 
 fc4[f!!  **suy//@@*
A
AC59?vzy}""""	B$  5(~~E~**w{    vzz~#{2222$		3
"344NNNN ~~Eq~99w{    vzz~#{2222$		3
"344NNNN ~~c~//7;$$$$vzz~#{2222$		3
"344NNNN~~d%~00w{    v|/111111z~#{2222$nnRtn<<O $****or""" >>3%u>--4444>>"E>**c1111%%se,,????%%b))_<<<< 
z	"	" & &1%%%& & & & & & & & & & & & & & & 
z	"	" * *5)))* * * * * * * * * * * * * * * * * *s$   O))O-0O-P11P58P5c                 n   t           j                            t          g dd          } t	          | j        t          j        g d                     t	          | j        d           t           j                            t          g d          } t	          | j        t          j        g d                     t	          | j        d           t           j                            t          g dd          } t	          | j        t          j        g d                     t	          | j        d           d S )NabcF)r   
categoriesr   )r   r   )r   d      T)	ro   categoricalcategorical_dtyper   r   r   rU   Indexr   )	cat_dtypes    r7   test_categorical_dtyper     s   00???E 1  I i"BH___$=$=>>>i'''00aOOO0TTIi"BH___$=$=>>>i'''00===$ 1  I i"BH]]]$;$;<<<i&&&&&r9   c                 "   t          j                    } t          j        | d          }|                                }|                                }|j        j        j        sJ t          ||
                    t          j        |j                            dd           |                    d          |u sJ t          j        |
                    |j                            d                    d          }|                                }|                    d          }|j        j        j        sJ t          ||
                    t          j        |j                            dd           |                                |u sJ d S )Nr   r   F)check_divisionsr   r   idxT)r   makeDataFramero   from_pandasrr   r   r   r   r   r   r3   rU   CategoricalIndexAr   )r   r~   r   r   s       r7   test_categorize_indexr     sx   


!
!C
.!
,
,
,C[[]]F>>D:>,V\::;;	    >>>&&#---- .))&(//%*@*@AAq
Q
Q
QC[[]]F>>>%%D:>,V\::;;	    >>s""""""r9   c                D   t          j        g dg dd          }t          j        |d         g dd          |d<   t          j        |d	          }t
          j                            d
          5  |                    d|j	        	          }|
                    d          |
                    d          }}t          |j                                                  dgk    sJ t          t          |j                                                            g dk    sJ |                    |j        |j	        	          }|
                    d          |
                    d          }}t          |j                                                  dgk    sJ t          t          |j                                                            g dk    sJ |                    dg d|j	                  }|
                    d          |
                    d          }}t          |j                                                  dgk    sJ t          t          |j                                                            g dk    sJ 	 d d d            d S # 1 swxY w Y   d S )N)r   r      rl   )r   r   r   r   r"   r#   r#   r   T)r   r   r   r   sync)	schedulerr   r   r   )r   r   r   )	divisionsr   )rU   rp   Categoricalro   r   daskconfigsetr3   r   get_partitionlistr   rr   sortedr#   )r}   rg   r   r   d1d2s         r7   test_categorical_set_indexr   6  s   	LLL/C/C/CDD	E	EBnRW$OOOBsG
rq)))A	6	*	* C CKKK77##Q__Q%7%7BBH$$&&''C50000F28++--..//???BBBBKKK77##Q__Q%7%7BBH$$&&''C50000F28++--..//???BBBBKKAMKRR##Q__Q%7%7BBH$$&&''C50000F28++--..//???BBBBBC C C C C C C C C C C C C C C C C Cs   4HJJJncategories)r   r   r   r   c                t   d}||z  }d t          |          D             }t          j        ||z  t          j                            |          d          }t          j        ||           }|d                             d          j        	                                |d<   |
                    d          }dS )z(https://github.com/dask/dask/issues/5343r'   c                2    g | ]}d t          |          z   S )CAT)strr4   s     r7   r8   zItest_categorical_set_index_npartitions_vs_ncategories.<locals>.<listcomp>S  s"    ===Q%#a&&.===r9   )idvaluer   r   r-   N)rangerU   rp   nprandomro   r   r   r   
as_orderedr3   )r   r   rows_per_categoryn_rowsr   r   r~   s          r7   5test_categorical_set_index_npartitions_vs_ncategoriesr   L  s     ,,F==%*<*<===J
,--	8H8H8P8PQQ C .+
6
6
6CD	  ,,0;;==CI
--

CCCr9   r   rl   c                   t          j        t          d          t          d          d          }t	                      rt          |          }t          j        |d          }|d                             d          |d<   |	                    |           }|
                                }|d                             d          |d<   t          ||           t          ||           d S )Nr'   
abababcbcbr   r   r   r#   r-   )rU   rp   r   r   r   r	   ro   r   r   repartitioncopyr   )r   rg   r~   r   s       r7    test_repartition_on_categoricalsr   ]  s    	E"IID,>,>??	@	@B   # r""
.
+
+
+C3xz**CH??{?33D	BgnnZ((BsGb#b$r9   c                 L   t          j        t          d          t          d          d          } | j                            d          | _        t          j        | d          }dt          |j                  v sJ dt          |j	                  vsJ t          |j        d          sJ t          |j	        d          rJ |                     | j                  }t          j        |dd	          }t          |j        d
          sJ t          |j        d
          rJ d S )Naaaaabbbbbcccccr+   r   r-   r   r   r   F)r   sortr   )rU   rp   r   r   r"   r   ro   r   dirr#   hasattrr3   r   )rg   r~   df2r   s       r7   "test_categorical_accessor_presencer   o  s   	D!<==E"IINN	O	OB4;;z""BD
.
+
+
+CCJJCE

""""35%     sue$$$$$
,,rt

C>#15999D4:|,,,,,sy,///////r9   c                 *   t          j        t          j        ddddt	          d          gi          d          } t          j        d          5 }|                                                                  d d d            n# 1 swxY w Y   |rJ d S )	Nr   r   r   nanr   r   T)record)	ro   r   rU   rp   floatwarningscatch_warningsr   rr   )rg   r   s     r7   test_categorize_nanr     s    	
cCc5<<89::
 
 
B 
	 	-	-	- "
!!!" " " " " " " " " " " " " " "s   'BBBc                H    t          | t          j                  r| n| j        S )N)
isinstancerU   r   r   rO   s    r7   get_catr     s     1b122=11=r9   c                    t          | t          |t          j                  rt	          j        |          n||           dS )z@left and right are equal, treating index and array as equivalentr   N)r   r   r   ndarrayrU   r   )leftrightr   s      r7   assert_array_index_eqr     sI    %eRZ88Ce'     r9   c                    t          j        dg di          } | d                             d          | d<   t          j        | d          }|j        j                                        }t          |t          j	        j
                  sJ d S )Nr   r   r-   r   )rU   rp   r   ro   r   r   r   as_knownr   coreSeries)rg   dask_dfret_types      r7   !test_return_type_known_categoriesr     sy    	sOOO,	-	-BgnnZ((BsGnR##Gy}%%''Hh///////r9   c                  :   e Zd Zej                            de          ej                            ddefdefdefg          d                         Z	ej                            de          ej                            dd e
d	d
g          fdi fdi fdi fd e
dg          fd e
g d          fd e
g d          fd e
g d          fdi fg	          d                         Zd Zej                            de          d             Zd Zd ZdS )TestCategoricalAccessorserieszprop, comparer   r   codesc                    |\  }}t          t          |          |          }t          t          |          |          } |||d           d S NFr   )getattrr   )selfr   propcomparesdsr   r   s           r7   test_propertiesz'TestCategoricalAccessor.test_properties  sT     271::t,,d++%888888r9   zmethod, kwargsadd_categoriesde)new_categoriesr   as_unorderedremove_categoriesr   )removalsrename_categories)r   r   freorder_categoriesr   r   )r   r   r   remove_unused_categoriesc                   t          j        |fi |}|\  }} |t          |                    } |t          |                    }t          ||d           t          t          |j                  j        t          |          j        d           t          t          |j                  j        t          |          j        d           d S r   )operatormethodcallerr   r   _metar   r   )	r   r   methodkwargsopr   r   r   r   s	            r7   test_callablez%TestCategoricalAccessor.test_callable  s      "644V44 22gajj>>GBKK&(E::::FL!!,H(!	
 	
 	
 	

 	FL!!)H%!	
 	
 	
 	
 	
 	
r9   c                    d }d }t          j         t          j        |                       t          j        |                      g          }|j        j        j         d S )Nc                 ~    t          j        dt          j        t          j        t          j        g          i          S )Nr   )rU   rp   r   r   r   r1   r9   r7   
make_emptyzBTestCategoricalAccessor.test_categorical_empty.<locals>.make_empty  s*    <bnbfbf5E&F&F GHHHr9   c                 V    t          j        dt          j        ddg          i          S )Nr   r   )rU   rp   r   r1   r9   r7   	make_fullzATestCategoricalAccessor.test_categorical_empty.<locals>.make_full  s%    <bnc3Z&@&@ ABBBr9   )ro   from_delayedr   delayedr   r   r   )r   r  r  r   s       r7   test_categorical_emptyz.TestCategoricalAccessor.test_categorical_empty  su    	I 	I 	I	C 	C 	C O5T\*55779Pi9P9P9R9RSTT	r9   c                   |\  }}|j         j        sJ |j                                         }|j         j        rJ t          j        t
          d          5  |j         j         d d d            n# 1 swxY w Y   t          j        t
          d          5  |j         j         d d d            n# 1 swxY w Y   t          j        t          d          5  |j         j         d d d            n# 1 swxY w Y   t          j        t          d          5  |j         j         d d d            n# 1 swxY w Y   |j         	                    g d          }|j         j        sJ t          j        |j         j        t          |          j                   t          |j         j        t          |          j                   |j                                         }|j         j        sJ |                                }t          j        |j         j        t          |          j                   t          |j         j        t          |          j                   d S )Nzwith unknown categoriesrm   r   )r   r   
as_unknownru   r   NotImplementedErrorr   r   AttributeErrorr   r   rY   r   r   r   rr   )r   r   r   dadbress         r7   test_unknown_categoriesz/TestCategoricalAccessor.test_unknown_categories  s   2v|V  6<].6OPPP 	 	F	 	 	 	 	 	 	 	 	 	 	 	 	 	 	].6OPPP 	 	FLL	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ]>1JKKK 	 	F	 	 	 	 	 	 	 	 	 	 	 	 	 	 	]>1JKKK 	 	FLL	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 V""???33v|
bf/1FGGGbflGAJJ,<===V__v|jjll
bf/1HIIIbflGCLL,>?????sH   A//A36A3B..B25B2C--C14C1D,,D03D0c                    t          j        g dd          }t          j        |d          }|j                                        }|j                                        }t          ||           d S )N)r   r   r   r-   r   r   )rU   r   ro   r   r   upperr   )r   r   r  r   r   s        r7   test_categorical_string_opsz3TestCategoricalAccessor.test_categorical_string_ops  s_    IoooZ888^Aq!!5;;==&(#####r9   c                    t          j        g dd          }t          j        |d          }t	          j        t                    5  |j                                         d d d            d S # 1 swxY w Y   d S )N)r   r   r   r-   r   r   )	rU   r   ro   r   ru   r   r  r   r"  )r   r   r  s      r7   "test_categorical_non_string_raisesz:TestCategoricalAccessor.test_categorical_non_string_raises  s    Iiiiz222^Aq!!]>** 	 	FLLNNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   A..A25A2N)__name__
__module____qualname__ru   markparametrize
cat_seriesr   r   r   dictr  r  r   r#  r%  r1   r9   r7   r   r     s       [Xz22[01	"+,	
 9 9  329 [Xz22[ttC:>>>?2R 2 $$"6"6"67 $$ooo"F"F"FG!44#G#G#GHtt???CCCD',
	
 
 
  32
&   [Xz22@ @ 32@6$ $ $    r9   r   )F)O
__future__r   rx   r
  r   numpyr   pandasrU   ru   r   dask.dataframe	dataframero   r   dask.dataframe._compatr   r   r   r   dask.dataframe._pyarrowr	   dask.dataframe.corer
   dask.dataframe.utilsr   r   r   r   r   r+  r   r   r   r   r   r   r   appendr   rr   r   rp   aranger   r   r   rT   rW   rg   r!   r   r#   r   r   r   rZ   r[   r\   r]   r^   r)  r*  paramxfailfilterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r1   r9   r7   <module>r;     s   " " " " " "                       " " " " " " R R R R R R R R R R R R 5 5 5 5 5 5 ' ' ' ' ' '              
  G	.".hAAABBA	q	)	)	)Bq"gBIeeAhhnbnTT(^^<<===R^A1%%% 
  2::<<%rx0 1 1 1 BLT']]T']]RYq\\T']]RYq%%%   BLT']]T']]RYq"T']]RYq"D)))   BLT']]T']]RYr2T']]RYr2T***   Q

  B4;;z""BD4;;z""BDNN
		dh%%dd5kk22dh%%dd5kk22 	 	
 	
    -
,V
,
,
,
-
-W
-
-
-
3
3F
3
3
3
4
4G
4
4
4'@ '@ '@V !!!*N "   !!%%.N "  	
	
 
	
 
	
 	+##&J $  	
 	
 	
 . FGG-..&  &  /. HG/ 2& R@* @* @*F' ' '$# # #BC C C, 			22			22  32 32 A//  0/"0 0 0   > > >   0 0 0f f f f f f f f f fr9   