
    >ie                       d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ  ej	        d          Z
 ej	        d          Z ej	        d          Z ej	        d           d dlZd dlZd dlmZmZ d d	lmZ ej                            ej        d
k    d          ej                            ed          ej        j        gZ ed                                          Zej                            d          e_        e                                Z ej         d          d             Z!ej        "                    dd          ej        "                    dd          d                         Z#ej        "                    dd          d             Z$ej        "                    dd          ej        "                    dd          d                         Z%d Z&ej                            e d          d             Z'dS )    )annotationsN)
timeserieszdask.dataframepysparkpyarrowfastparquet)PANDAS_GE_150PANDAS_GE_200)	assert_eqlinuxzAUnnecessary, and hard to get spark working on non-linux platforms)reasonz/pyspark doesn't yet have support for pandas 2.01h)freqUTCmodule)scopec               #    K   t          j        t           j                  } t          j        j        j                            d                              d          	                    dd          
                                }|V  |                                 t          j                    t          j                    u r!t          j         t           j        |            d S d S )NlocalzDask Testingzspark.sql.session.timeZoner   )signal	getsignalSIGINTr   sqlSparkSessionbuildermasterappNameconfiggetOrCreatestop	threadingcurrent_threadmain_thread)prevsparks     <lib/python3.11/site-packages/dask/tests/test_spark_compat.pyspark_sessionr%   +   s      
 FM**D 	 (//88		 	 	,e	4	4		 
 KKK	JJLLL!!Y%:%<%<<<fmT***** =<    npartitions)      
   engine)r   r   c                   t          |          }|                     t                    }|                    |          j                            |d           t                              ||          }|                    |j	        j
                            d                    }|j        |k    sJ t          |t          d           d S )N	overwritemoder+   r   	timestampFcheck_index)strcreateDataFramepdfrepartitionwriteparquetddread_parquetassignr2   dttz_localizer'   r
   )r%   r'   tmpdirr+   sdfddfs         r$   $test_roundtrip_parquet_spark_to_daskrC   A   s     [[F

'
'
,
,C OOK  &..vK.HHH
//&/
0
0C
**s}/;;EBB*
C
CC?k))))c3E******r&   c                2   t          |          }|                     t                    }|j                            |dd           t
                              ||          }|                    |j        j	        
                    d                    }|                                                    d          }|                    |j                            d	          
          }t          |t                              d          d           d S )Nr-   name)r/   partitionByr0   r   r1   r(   )axisr5   )rE   Fr3   )r5   r6   r7   r9   r:   r;   r<   r=   r2   r>   r?   compute
sort_indexrE   astyper
   )r%   r@   r+   rA   rB   s        r$   )test_roundtrip_hive_parquet_spark_to_daskrK   S   s    [[F

'
'
,
,C If;FCCC
//&/
0
0C
**s}/;;EBB*
C
CC ++--
"
"
"
*
*C **#(//%00*
1
1Cc3>>q>))u======r&   c                   t          |          }t                              t          |          }|dk    rddini } |j        |f|dd| | j                            |          }|                                }|                    |j	        j
                            d                    }t          ||d	           d S )
N)r'   r   timesint96F)r+   write_indexr   r1   r3   )r5   r;   from_pandasr7   
to_parquetreadr:   toPandasr=   r2   r>   r?   r
   )r%   r'   r@   r+   rB   kwargsrA   s          r$   $test_roundtrip_parquet_dask_to_sparkrU   j   s     [[F
..+.
6
6C $*]#:#:gwFCN6F&eFFvFFF


$
$V
,
,C
,,..C **s}/;;EBB*
C
CCc3E******r&   c                   t          |          }d}d}t          j        t          |          t          j                            |          ddg|dz  z  ddg|dz  z  d	          }|                    d
dddd	          }t          d |j        D                       sJ | 	                    |          }|
                    |          j                            |d           t                              |dd          }t          d |j        D                       sJ |j                    t          ||d           d S )Nr)      )sizeTF   alicebob)abcdInt64Float64booleanstringc                V    g | ]&}t           j        j                            |          'S  pdapitypesis_extension_array_dtype.0dtypes     r$   
<listcomp>zItest_roundtrip_parquet_spark_to_dask_extension_dtypes.<locals>.<listcomp>   s*    UUU55e<<UUUr&   r-   r.   r   numpy_nullabler+   dtype_backendc                V    g | ]&}t           j        j                            |          'S re   rf   rk   s     r$   rn   zItest_roundtrip_parquet_spark_to_dask_extension_dtypes.<locals>.<listcomp>   s*    NNN%	.	.u	5	5NNNr&   r3   )r5   rg   	DataFramerangenprandomrJ   alldtypesr6   r8   r9   r:   r;   r<   r
   )r%   r@   r'   rX   r7   rA   rB   s          r$   5test_roundtrip_parquet_spark_to_dask_extension_dtypesry   ~   st   [[FKD
,t!!t!,,$!),5!TQY/		
 	
 C **		
 	
 C UU#*UUUVVVVV

'
'
,
,C OOK  &..vK.HHH
//&BR/
S
SCNN3:NNN   
z   c3E******r&   z'Requires pyarrow-backed nullable dtypesc           	     P   t          |          }d}d}t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          t          j        d          g}t          j        t          |          |d	          }|                     |          }|                    d
|d
                             t          j
        j                            dd                              }|                    |          j                            |d           t                               |dd          }|j        j        j        t*                              dd          k    sJ |j                                        j        j        t*                              dd          k    sJ |                    dt          j        t*                              dd                    d	          }t5          ||d           d S )N      z8093.234z8094.234z8095.234z8096.234z8097.234z8098.234)r\   r]   r]      r-   r.   r   rp   zint64[pyarrow]Fr3   )r5   decimalDecimalrg   rs   rt   r6   
withColumncastr   r   ri   DecimalTyper8   r9   r:   r;   r<   r]   rm   pyarrow_dtypepa
decimal128rH   rJ   
ArrowDtyper
   )	r%   r@   r'   rX   decimal_datar7   rA   rB   expecteds	            r$   test_read_decimal_dtype_pyarrowr      s   [[FKD 	
##
##
##
##
##
##L ,t	
 	
 C 
'
'
,
,C
..c#hmmGK,=,I,I!Q,O,OPP
Q
QC OOK  &..vK.HHH
//&)/
L
LC5;$a(;(;;;;;5==?? ."--12E2EEEEEzz!r}}Q2233	
 	
 H c8//////r&   )(
__future__r   r~   r   sysr   pytestdask.datasetsr   importorskipr;   r   r   numpyru   pandasrg   dask.dataframe._compatr   r	   dask.dataframe.utilsr
   markskipifplatformskip_with_pyarrow_strings
pytestmarkrH   r7   indexr?   reset_indexfixturer%   parametrizerC   rK   rU   ry   r   re   r&   r$   <module>r      s   " " " " " "   



      $ $ $ $ $ $V)**
&
i
(
(V##  M " " "         ? ? ? ? ? ? ? ? * * * * * * KR    K@   
 K)
 jd##%%I!!%((		oo h+ +  +* 
33#=>>+ + ?> 43+  #=>>> > ?>>, 
33#=>>+ + ?> 43+$%+ %+ %+P %.WXX#0 #0 YX#0 #0 #0r&   