
    >ieX                       d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ 	 d dlZd dlmZ d d	lmZmZmZmZ d d
lm Z m!Z! n# e"$ r Y nw xY wd dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9  ej:                    Z;d Z<e<Z= G d de&          Z>dS )    )annotationsN)OrderedDictdefaultdict)	ExitStack)parse)flatten)PANDAS_GE_201)ParquetFile)ex_from_sepget_file_schemegroupby_types
val_to_num)make_part_filepartition_on_columns)tokenize)
Engine_get_aggregation_depth_infer_split_row_groups_normalize_index_columns_parse_pandas_metadata_process_open_file_options_row_groups_to_parts_set_gather_statistics_set_metadata_task_size_sort_and_analyze_paths)_is_local_fs_meta_from_dtypes_open_input_files)UNKNOWN_CATEGORIES)Delayed)natural_sort_keyc                   |dv ri }|S t                      }t                      }t          d          t          j        |           } |dk    rt          j        fd| D                       }|D ]|\  }}|                    |t                                                    t          |                     |                    |t                                                    |           }nt          j        d | D                       }|D ]\  }}d|z  }|                    |t                                                    t          |                     |                    |t                                                    |           |                                D ]\  }}	||         }
t          |	          t          |
          k    rt                      }||         D ]D}|                    t          |          t                                                    |           Ed |
                                D             }t          d|z            t          |	          }t          |          d	k    r5d
 |
                                D             }t          j        d|z             t          d |                                D                       }|S )a  
    Extract categorical fields and labels from hive- or drill-style paths.
    FixMe: This has been pasted from https://github.com/dask/fastparquet/pull/471
    Use fastparquet.api.paths_to_cats from fastparquet>0.3.2 instead.

    Parameters
    ----------
    paths (Iterable[str]): file paths relative to root
    file_scheme (str):

    Returns
    -------
    cats (OrderedDict[str, List[Any]]): a dict of field names and their values
    )simpleflatother/hivec              3  V   K   | ]#}                     |          D ]\  }}||fV  $d S N)findall).0pathkvss       Elib/python3.11/site-packages/dask/dataframe/io/parquet/fastparquet.py	<genexpr>z!_paths_to_cats.<locals>.<genexpr>L   sC      !W!WTqyyQU!W!Wtq!1a&!W!W!W!W!W!W!W    c              3  ~   K   | ]8}t          |                    d           dd                   D ]\  }}||fV  9dS )r&   N)	enumeratesplit)r+   r,   ivals       r0   r1   z!_paths_to_cats.<locals>.<genexpr>Q   sk       
 
Ytzz#sPRs?S5T5T
 
+11cQH
 
 
 
 
 
 
r2   zdir%ic                B    g | ]}t          |          d k    |D ]}|S )   )len)r+   r-   cs      r0   
<listcomp>z"_paths_to_cats.<locals>.<listcomp>`   s4       #a&&1**a******r2   z)Partition names map to the same value: %sr:   c                    g | ]
}|d          S r    )r+   xs     r0   r=   z"_paths_to_cats.<locals>.<listcomp>h   s    <<<!<<<r2   z<Partition names coerce to values of different types, e.g. %sc                6    g | ]\  }}|t          |          fS r@   )list)r+   keyr.   s      r0   r=   z"_paths_to_cats.<locals>.<listcomp>n   s&    BBB63d1ggBBBr2   )r   r   toolzunique
setdefaultsetaddr   itemsr;   values
ValueErrorr   warningswarn)pathsfile_schemecatsraw_cats
partitionsrD   r8   i_valr7   r.   rawconflicts_by_valueraw_val	conflictsvals_by_typeexamplesr/   s                   @r0   _paths_to_catsr[   4   s    111==D}}HCALEf\!W!W!W!WU!W!W!WWW
" 	5 	5HCOOC''++JsOO<<<SUU++//4444	5  
 
!&
 
 
 
 
  	5 	5FAsA+COOC''++JsOO<<<SUU++//4444**,,  Qsmq66SXX!,#C= W W"--j.A.A355IIMMgVVVV -4466  I H9TUUU$Q'' |q  <<l&9&9&;&;<<<HMN  
 BBTZZ\\BBBCCDKr2   c                     e Zd Zed             Zed             Ze	 	 	 	 dd            Zed             Zed             Zed             Z	ed             Z
e	 	 	 	 	 	 	 	 	 	 	 	 dd            Zed             Ze	 	 	 	 	 dd            Ze	 	 	 	 	 dd            Ze	 	 	 	 	 	 	 	 dd            Ze	 	 	 dd            Zedd            ZdS )FastParquetEnginec           	     
   t          |j                  }|rB|
r@|j        r9|j        d         j        d         j        rt          |j        d           |_        i }|j        rH|j        rA|j                            dg           D ]%}d|v r|                    dd          ||d         <   &t          |          dk    }t          t                     }t          t                     }t          t                     }i }t          |j                  D ]\  }}|r#|r!t          j                            ||          r+|j        d         j        }t          |t                    r|                                n|}||s|j        }|pd	}nt%          d
          ||         r3||                             ||         d         d         dz   |f           n||                             d|f           |r|r||j        |j        g d}n|j        |j        d}g }|                                D ]\  }}|j        |         }|j        j        r"d}d}d}|j        d         |         d         L|j        d         |         |         }|j        d         |         |         }|j        d         |         |         }n||         dk    r|j        j        j        }|j        j        j        }|j        j        j        }||j        j        j        }||j        j        j        }t          |t          t<          f          rD|                    |d          dk    r*|                    d          }|                    d          }t          |t          t<          f          r|                    d          }t          |t>          j                   rCtC          ||         dd          }tE          j#        ||          }tE          j#        ||          }|                    |d          } |s|	r|du s|
s|| r|| k     rd}i }i } n~|r!|d                             ||||d           n||||gz  }|||<   B|s |	r|du s|
s|j        j$        dk    rd}i }i } n*|r|d                             d|i           |g dz  }|rE||                             |           |s(||                             tK          |                     |||||fS )zOrganize row-groups by file.r   c                @    t          | j        d         j                  S )Nr   )r!   columns	file_path)rA   s    r0   <lambda>z8FastParquetEngine._organize_row_groups.<locals>.<lambda>   s    .qy|/EFF r2   )rD   r`   
field_namepandas_typeNr:    zGlobal metadata structure is missing a file_path string. If the dataset includes a _metadata file, that file may have one or more missing file_path fields.r4   )file_path_0num-rowstotal_byte_sizer`   )rg   rh   minmax
null_countobjectbyteszutf-8tz)rn   TF)nameri   rj   rk   ro   NNN)&rC   rQ   
row_groupsr`   ra   sortedpandas_metadatagetintr   r5   fastparquetapifilter_out_cats
isinstancerm   decodefnrL   appendnum_rowsrh   rJ   	meta_data
statistics	min_value	max_valuerk   ri   rj   	bytearraynp
datetime64getattrpd	Timestamp
num_valuestuple)!clspfsplit_row_groupsgather_statisticsstat_col_indicesfiltersdtypes	base_pathhas_metadata_file	blocksizeaggregation_depthpqpartitionsrd   r<   single_rg_partsfile_row_groupsfile_row_group_statsfile_row_group_column_stats	cmax_lastrg	row_groupfpfpathr/   cstatsro   r7   columncmincmaxrk   rn   lasts!                                    r0   _organize_row_groupsz&FastParquetEngine._organize_row_groupsx   s   " BG}}
 		!		 		 a (+5			 #FF  BM = 	NR/ 	N'++Ir:: N N1$$34553M3MK,0 .//14%d++*400&1$&7&7#	&r}55 L	Q L	QMB	   O33IwGG
 
 "1%/B#-b%#8#8@BIIKKKbE}(  EE )RII$E   u% 7&--u/Eb/I!/Lq/PRT.UVVVV&--q"g666  hQ" ',$-$6+4+D#%	 AA %.$6+4+D A /5577 V9 V9GD!&.q1F'2 T9##%)
=/5a8D#%=#7#=b#AD#%=#7#=b#AD)+|)DT)J2)NJJ#D\X55#)#3#>#HD#)#3#>#HD)/)9)D)OJ#|'-'7'B'F#|'-'7'B'F !+4%1C D D<$/OOD$$?$?7$J$J'+{{7';';'+{{7';';)*ui6HII H-7->->w-G-G
%dBM:: =!(tT!B!BB#%<#<#<#<D#%<#<#<#<D(}}T488 $& )&.>$.F.F0 /G
  $|| 5: 179 4>@ ; %* 
?iL//,0+/+/2<	!" !"    #tT:&>>F*.	$ !(" %." 3Cd2J2J#4 3K !' 0 ;a ? ? 16-350:<7!E* 9iL//????"&8&8&88FF$ Q(/66q999* Q3E:AA%--PPP  '
 	
r2   c                T   g }|D ]\  }}|j         |         }|j        }t          |          D ]]\  }	}
|	rd|
_        |
j        }d|_        |j        }|r#d|_        d|_        d|_	        d|_
        d|_        d|_        d|_        d|_        ^||_        |                    |           |S )z]Turn a set of row-groups into bytes-serialized form
        using thrift via pickle.
        N)rq   r`   r5   ra   r~   key_value_metadatar   distinct_countrj   ri   r   r   	encodingstotal_uncompressed_sizeencoding_statsr|   )r   r   filenamerq   real_row_groups_	rg_globalr   r`   r<   colmdsts                r0   _get_thrift_row_groupsz(FastParquetEngine._get_thrift_row_groups;  s     & 	. 	.LAyi0I'G#G,, ) )3 )$(CM](,% ] ((,B%!BF!BF#'BL#'BL#-1*$(!! 'I""9----r2   Nc                    |r|                      |||          }d|fi}n8|j                            d ||fD                       }	d |D             }
d|	|
fi}|S )z1Generate a partition-specific element of `parts`.piecec                    g | ]
}|d k    |S )re   r@   r+   ps     r0   r=   z0FastParquetEngine._make_part.<locals>.<listcomp>u  s    $Q$Q$Q1bQr2   c                    g | ]
}|d          S r?   r@   r+   r   s     r0   r=   z0FastParquetEngine._make_part.<locals>.<listcomp>v  s    222B"Q%222r2   )r   sepjoin)r   r   rg_listfsr   r   rS   r   part	full_pathrq   s              r0   
_make_partzFastParquetEngine._make_part`  s      	6!88 O
 o/0DD $Q$QH0E$Q$Q$QRRI22'222Ji45Dr2   c                   |                     di           }g }d}t          |          dk    r                    |d                   r~|d         d}|
s/                    j                            dg                    }|
s|skt                                                  \  }}d}dD ]*}	 |                    |           d}# t          $ r Y 'w xY w|rfd	|D             }d}|r0t          j                            dg          fd
j        i|nsrFt          |          }fd|D             }fd|D             }|r|g k    rt          d d          t          |d d         fj        d|t          |          }|_        t          ||          _        |sfd|D             }nt          |          \  }}d|v }|r|
r|                    d           d}fd|D             }|r/t          j                            dg          fd
j        i|n^t          |          }t          |d d         fj        d||_        t          ||          _        |s|                                }t#          |	t%          j                            }j        rfdj        D             }|si _        ndt          |          t          j                  k    r?t          d                    j        j                                                            |dk    rL|rHt          |d         fd
j        i|}t-          d |j        D             |t1          |	                    }nd}|dk    r|rd}nd}i dd|d|d|ddd|d|d|d |d!|d"|d#|	d$|d%|d&d|i|S )'NdatasetFr:   r   T	_metadata)root)r   _common_metadatac                H    g | ]}j                             |g          S r@   r   r   r+   r{   baser   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  +    CCCRV[[$44CCCr2   	open_withc                >    g | ]}|                               |S r@   endswith)r+   r,   parquet_file_extensions     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  s;       !%$--@V2W2W  r2   c                >    g | ]}|                               |S r@   r   )r+   r{   r   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  s+    SSS"r{{;Q/R/RS2SSSr2   zLNo files satisfy the `parquet_file_extension` criteria (files must end with z).r   r   c                H    g | ]}j                             |g          S r@   r   r   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  r   r2   c                H    g | ]}j                             |g          S r@   r   r   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  s+    ;;;RV[[$,,;;;r2   c                &    g | ]}|j         v|S r@   )r`   )r+   r   r   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>  s%    EEE"*1D1D11D1D1Dr2   zNo partition-columns should be written in the 
file unless they are ALL written in the file.
This restriction is removed as of fastparquet 0.8.4
columns: {} | partitions: {}inferc                    g | ]	}|j         
S r@   )rh   r   s     r0   r=   z;FastParquetEngine._collect_dataset_info.<locals>.<listcomp>(  s    GGGBR'GGGr2   adaptiver   rO   r   partsr   r   r   
categoriesindexr   r   r   aggregate_filesr   metadata_task_sizekwargs)popr;   isdirisfiler   r   r   findremoverL   r
   openr   rP   paths_to_catsrQ   copyr   rC   formatr`   keysr   rq   bool)r   rO   r   r   r   r   r   r   r   r   ignore_metadata_filer   r   r   dataset_kwargsr   _metadata_existsfns_update_pathsr{   len0schemer   _partitions	pf_sampler   r   s     `         `            @@r0   _collect_dataset_infoz'FastParquetEngine._collect_dataset_info{  s   .  Ir22 u::??rxxa11? 8D#' O#%99RV[[$9L-M-M#N#N  $ )+; )#:2774=="SW#X#X#X tS %;  B

2(,%     DCCCCCsCCCE#(  D FKK{ 344  g %  * u::D   ).  E TSSSSSSC (O4JO O O   !"1"I)+t ?M  )--!''V44( DCCCCCsCCCE  7ubAAE4  +c1 )$8 )

;'''#( ;;;;;s;;;E ) FKK{ 344  g %  )-- "1"I)+t ?M  "('V44( )!JJLLE 3MM
 
 7 
	EEEEbgEEEK [!!S\\11 3 4:6"*bgllnn3U3U	  P w&& )'!H  g % 	
 $;GG)2FGGG))$ $   $) z)) )#'  #( 
"
U
  !1
 U	

 D
 "
  !2
 *
 U
 w
  0
 
 
  !2
 !"4
  >!
 	
s   >C
C#"C#c                   |d         }|d         }|d         }d }|j         }|r/t          |          \  }}}	|                    |j                   n-g }|j        t          |j                  z   }d |D             d g}	|9t          |          dk    r&t          |          dk    r|d         	|d         }n|}t          ||||          \  }}||z   }
d }t          |t                    r|}||j
        }n(t          |t                    r|g}nt          |          }|rRt          |                              |
          s0t          d                    |t          |
                              |                    |          }fd|                                D             }|pd	}t          |t                    r|g}|D ]6}t%          |                    |          d
d           r||         j        ||<   7|D ]$}||
v rt+          j        t.          g          ||<   %|j        D ])}||
v r#t+          j        |j        |                   ||<   *t1          |
|||	          }||d<   ||d<   ||d<   ||d<   ||d<   |S )Nr   r   r   c                    i | ]}||S r@   r@   )r+   r-   s     r0   
<dictcomp>z5FastParquetEngine._create_dd_meta.<locals>.<dictcomp>a  s    #?#?#?QAq#?#?#?r2   r   r:   zAcategories not in available columns.
categories: {} | columns: {}c                D    i | ]\  }}                     ||          |S r@   )rt   )r+   r-   r.   storage_name_mappings      r0   r   z5FastParquetEngine._create_dd_meta.<locals>.<dictcomp>  s0    OOO1&**1a00!OOOr2   r@   numpy_dtype)r   r   
index_colscategories_dict)rs   r   extendrQ   r`   rC   r;   r   ry   dictr   strrH   intersectionrL   r   _dtypesrJ   r   rt   r   r   CategoricalDtyper   r   )r   dataset_infor   r   r   r`   	pandas_mdindex_namescolumn_namescolumn_index_namesall_columnsr   r   r   indcatcatcolmetar   s                     @r0   _create_dd_metaz!FastParquetEngine._create_dd_metaK  s    $W%!,/
&	 	( 'y11$" ((((K:RW5L#?#?,#?#?#? "&=S--11;1$$Q)C#A# %=\5+%
 %
!k "L0j$'' 	)(OJJ
C(( 	*$JJj))J  	c*oo::;GG 	//5vj${BSBS/T/T  
 J''OOOOOOO[b
j#&& 	&$J 	6 	6Cvzz#t<< 6$Sk5s 	S 	SCk!! 1>P=QRRRsg 	Q 	QF$$!#!4!P!P!Pv fjBTUU "(X %W%/\"%/\"*9&'r2   c                J   |d         }|d         }|d         }|d         }|d         }|d         }|d         }|d         }	|d	         }
|d
         }|d         }|d         }|d         }|d         }|d         }|d         }|d         }t          |d         |          }d t          |pg t                    D             }i }|	rt          |          dk    r|ng }t	          |j                  D ]\  }}||v s||v r|||<   t          |	||||t          |          |z            }	|p||j        |j	        |
d|}|	du rP|sNt          |t                    r9t          |          r*t          |d         t                    rd |D             g |fS |||	||||||j        |j	        |
dn|
|d}|s|dk    s|t          |          k    r |r|n|}|                     ||          \  }}ng g }}|ri }dt          ||          z   }g }t	          t          dt          |          |                    D ]:\  }}|                    ||f           | j        ||||z            |f||d         <   ;d } | |f|d|z   <   t!          d|z   |                                          \  }}|||fS )Nr   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c                    h | ]
}|d          S r?   r@   )r+   ts     r0   	<setcomp>z?FastParquetEngine._construct_collection_plan.<locals>.<setcomp>  s    OOO1!A$OOOr2   )	containerr:   )r   	root_catsroot_file_schemer   Fr   c                    g | ]}d |dfi	S )r   Nr@   )r+   r   s     r0   r=   z@FastParquetEngine._construct_collection_plan.<locals>.<listcomp>  s"    EEE)'It,-EEEr2   re   )r   r   r   r   r   r   r   r   r  r  r   r   zgather-pq-parts-r4   c                :    g g }}| D ]\  }}||z  }|r||z  }||fS r)   r@   )parts_and_statsr   statsr   stats        r0   _combine_partszDFastParquetEngine._construct_collection_plan.<locals>._combine_parts  sC    #%r5E&5 * *
d *!TME %<'r2   zfinal-)r   r   rC   r;   r5   r`   r   rH   rQ   rP   ry   r   _collect_file_partsr   ranger|   r    compute)!r   r  r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   filter_columnsr   _index_colsr7   ro   common_kwargsdataset_info_kwargspf_or_pathsr  gather_parts_dskfinalize_listtask_ifile_ir  s!                                    r0   _construct_collection_planz,FastParquetEngine._construct_collection_plan  s    $W%W%y)$'(:; -	()<= (	()<=!,/
!,/
h'&'89()<=)*>?h' 5-.
 
 PO2(N(N(NOOO%6X3z??a;O;OjjVX ,, 	+ 	+GAt{""dn&<&<)* &
 3 !!N2
 
 *7Z ""	
 

 
 &&$ '5$'' ' E

 ' 58S))	 ' FEuEEE   0!2 0!2" "(0i!2
 
  (	T!Q&&!CJJ.. !2<""uK22;@STTLE55
 r5E T#% )HU<O,P,PP "&/!SZZ);<<' '  NFF "(($888/fv0B'BBC+;$]2%677( ( ( 6D]4S D1&x$8HIIQQSSue]**r2   c                j   |d         }|d         }|d         }|d         }|d         }|d         }|d         }	|d         }
|                     d	d           }|                     d
d           }|                     dd           }|d         }t          |t          j        j                  s*t	          ||j        |          }|pi |_        |r||_        n|}|                     |||||||||	|

  
        \  }}}}}t          |||
||||| j
        |||t          |j                  d	  	        \  }}||fS )Nr   r   r   r   r   r   r   r   r   r  r  r   r   )r   r   r   rS   )make_part_kwargs)rt   ry   rv   rw   r
   r   rQ   rP   r   r   r   rC   )r   pf_or_filesr  r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   r   r   r  s                        r0   r  z%FastParquetEngine._collect_file_parts%  s    !&./AB/0CD./AB%i0$X.'4	/0CD'++K>>	'++K>>	.223EtLL/0CD +{'BCC 	'  B  o2BG 2!1 B $$
 
	
 ' , 'N&"27mm	 
 
 
u" e|r2   r   Fc                   |t          d          |t          d          |                     |||||||	|
|||||          }|                     |          }|                     |          \  }}}|d         }|du r!d |j        v r|                    d gd           t          |          r-||d         d<   |d	         |d         d	<   |d
         |d         d
<   t          |          rRt          |d         d                   dk    r3|d         }d |_        d |j        _        d |_	        ||d         d         d<   ||||fS )Nz@`use_nullable_dtypes` is not supported by the fastparquet enginez:`dtype_backend` is not supported by the fastparquet enginer   FT)r`   inplacer   r  r   r   r   r:   r   parquet_file)
rL   r   r  r%  r`   dropr;   rq   fmd_statistics)r   r   rO   r   r   use_nullable_dtypesdtype_backendr   r   r   r   r   r   r   r   r   r  r
  r   r  r  r   s                         r0   read_metadatazFastParquetEngine.read_metadataq  s   & *R   $L  
 00 "
 
" ""<00 '*&D&D\&R&R#um W%E>>ddl22IItfdI333
 u:: 	L(5E!H_%,89L,ME!H()+78J+KE!H'(u:: 	;#eAhw/00A55 d#B BM $BF!BN8:E!H_%n5eUE**r2   c                    | t           k    S r)   )r]   )r   s    r0   multi_supportzFastParquetEngine.multi_support  s    '''r2   r@   c
           	        d}|sdn|	}	t          |t                    r|d gk    rg }d}||z  }|
                    dd           t          |t                    s|g}|d         }t          |t                    rTt          |d         t                    rJ g }dt          d |D             f|j        |	pdd|
                    di           |D ]}t          |          dk    rn0t          |d         f|j        |	pdd|
                    di           }t          |j	                  }|d         pt          t          |                    }|fd	|D             z  }|z  t          |          t          j	                  k     }nQr@g }|D ]8}|d         }t          |t                    rt          j        |          }||z  }9d}nt          d
          |rt          5  |D ]C}|j        D ]9}|j        }|r.t          |t                    r|                                |_        :D|j        _	        j        }                                 |_        d d d            n# 1 swxY w Y   |rdj        v rdg}||z  }|pi _        |r|_        fd_         | j        f||||d|
                    di           S t          dt3          |                     )NFTr+  r   c                    g | ]
}|d          S r?   r@   r   s     r0   r=   z4FastParquetEngine.read_partition.<locals>.<listcomp>  s    ***aQqT***r2   r   r   r:   c                0    g | ]}j         |z            S r@   )rq   )r+   r   r+  	rg_offsets     r0   r=   z4FastParquetEngine.read_partition.<locals>.<listcomp>   s5     # # # %/Y?# # #r2   z&Neither path nor ParquetFile detected!__index_level_0__c                     j         S r)   )r   )argsr+  s    r0   rb   z2FastParquetEngine.read_partition.<locals>.<lambda>/  s
    l1 r2   )r   r`   r   r   readzExpected tuple, got )ry   rC   r   r   r   r
   r   rt   r;   rq   r  rm   pickleloadsrL   _FP_FILE_LOCKr`   ra   rz   r-  rQ   
_set_attrsrP   r   pf_to_pandastype)r   r   piecesr`   r   r0  r   r  r  r   r   null_index_namesamplerq   r   _pfn_local_row_groupslocal_rg_indicesupdate_parquet_filergsr   chunkr/   	save_catsr+  r7  s                           @@r0   read_partitionz FastParquetEngine.read_partition  s:     !*9EE		eT"" 	 "&uG zz.$77 &$'' 	XFfe$$ X	D&)S)) ,K#+++
	***6***  g"+e    jjB//	    $ 4 4E v;;!++ %(!!H&(g!*!3e  %jjB77	   *-S^)<)<&',Qx'R4>P8Q8Q3R3R$ # # # # #"2# # # J !33II&)*ooL<S8T8T&T## K
# & &E  (C!#u-- 0$l3//#%JJ&*## !!IJJJ" 2" 2 2( = =%'Z = =E %A  =Z5%9%9 =23((**=
 3=L$/ , 1I ++---(1L%2 2 2 2 2 2 2 2 2 2 2 2 2 2 2  %&,*>>>01Eu$G !*RL <+;( 2111  
 $3#%  **VR((   BDLLBBCCCs   .A5I//I36I3c                   d d          n|j         t          |j                  z   |rfd|D             z  |j        }t	          d |D                       }	|                    |	||          \  }
}t          t          j                  t          d          k    r3t          r,|
j         j
        r t          j        g t                    |
_         dt          t                    }|D ]2}|                    |          }||                             |           3t#          |fi t%          |          rdddnU|t          t'                                        |j                             d	 |                                D             d
dd\  }}t-                      5 }t/          |                                t3          t          |                                          f|||d|          D ]W\  }}||         D ]I}|j        fd|                                D             } |j        |||f||j        |d| z  JX	 d d d            n# 1 swxY w Y   |
S )Nc                    g | ]}|v|	S r@   r@   )r+   r7   r`   s     r0   r=   z2FastParquetEngine.pf_to_pandas.<locals>.<listcomp>V  s#    ===aAW,<,<,<,<,<r2   c              3  $   K   | ]}|j         V  d S r)   )r}   r   s     r0   r1   z1FastParquetEngine.pf_to_pandas.<locals>.<genexpr>Z  s$      --22;------r2   z	2023.02.0)dtyper   F	readahead)allow_precachedefault_cachec                    g | ]}|S r@   r@   )r+   rI  s     r0   r=   z2FastParquetEngine.pf_to_pandas.<locals>.<listcomp>v  s    "E"E"E33"E"E"Er2   rv   )metadatar`   rq   default_enginerS  )r   context_stackprecache_optionsc                `    i | ]*\  }}||                     d           r|n|z            +S )z-catdefr   )r+   ro   r.   startthislens      r0   r   z2FastParquetEngine.pf_to_pandas.<locals>.<dictcomp>  sX        &T1 #}}Y77<AA!"557?#:!;	  r2   )assignpartition_metainfile)r`   rC   rQ   rq   sumpre_allocateparse_versionrv   __version__r	   emptyr   Indexrl   r   row_group_filenamer|   r   r   rH   r   rK   r   zipr   r   r}   rJ   read_row_group_filer]  )r   r   r   r`   r   r   open_file_optionsr   rI  sizedfviews	fn_rg_mapr   r{   rX  stackr^  r   rZ  r[  s      `               @@r0   r@  zFastParquetEngine.pf_to_pandas@  sV   " aaajGGj4==0G 	>====5====G m-------OOD':uEE	E+122mK6P6PPP Q
  Q "F333BJ  %%	 	% 	%B&&r**BbM  $$$$ /I/
 /
  ##	&+%0   !##CLL$=$=bj$I$IJJ"E"E)2B2B2D2D"E"E"E&3%0 /
 /
++$ [[ !	%E!  !))**"'%5	 
 ( 	 	  %  %
F $B- % %B kG     */  E +B*"		
  %')'8%	 	 !	 	 	 W$EE-% %!	% !	% !	% !	% !	% !	% !	% !	% !	% !	% !	% !	% !	% !	% !	%D 	s   )B)II#&I#r   utf8c           	     <   |
g }
|r|d}|                     |d           |	dk    s+t          |	t                    r%d|	                                v rt	          d          d}|rp	 t
          j                            ||j                  }|	                    |j
                            |dg                    }n# t          t          f$ r d}Y nw xY w|rVdd	lm} |j        d
vrt	          d          t!          |j                  t!          |j                  t!          |          z
  k    s%t!          |          t!          |j                  k    r:t	          d                    |j        t)          |j                                      t+          j        |j                  j        |j                  |||j                 j                  j        k                                    rmt	          d                    t!          |j                                                  t!          |j                                                  z                      ||j        |z            }|j        }t
          j                            |j                  }|s+t!          |
                               |d         g          sd}|sjt
          j        !                    |          }|
d         |v r||
d                  d         d         nd }|d         }||d         |k    rt	          d          n"t          j        j"        |j        f|	|
|d|}d}|A|j#        pg }|$                    d |                                D                        ||_#        d|i}||||fS )NTexist_okr   zM"infer" not allowed as object encoding, because this required data in memory.Fr   r   r   )to_object_string)r'   rc  r$   z?Requested file scheme is hive, but existing file scheme is not.z5Appended columns not the same.
Previous: {} | New: {}zAppended dtypes differ.
{}ro   rj   r4   	divisionszThe divisions of the appended dataframe overlap with previously written divisions. If this is desired, set ``ignore_divisions=True`` to append anyway.
- End of last written partition: {old_end}
- Start of first new partition: {divisions[0]})object_encodingr   ignore_columnsc                V    g | ]&\  }}t           j                            ||           'S )rD   valuerv   parquet_thriftKeyValuer+   rD   ry  s      r0   r=   z6FastParquetEngine.initialize_write.<locals>.<listcomp>  B       "U  .77Cu7MM  r2   r-  )%mkdirsry   r   rK   rL   rv   rw   r
   r   existsr   r   OSErrordask.dataframe._pyarrowrs  rP   rH   r`   rQ   r   rC   r   Seriesr   loc_metaanyrJ   r-  writerfind_max_partrq   r   sorted_partitioned_columnsmake_metadatar   r   )r   rj  r   r,   r|   partition_onignore_divisionsdivision_infoschemaru  r   custom_metadatar   metadata_file_existsr   rs  r-  i_offsetminmaxold_endrt  kvmextra_write_kwargss                          r0   initialize_writez"FastParquetEngine.initialize_write  s     J 	$m+#
		$	&&&g%%-- &29_=S=S=U=U2U2U8  
  % 	 !_000II')yydK=P1Q1Q'R'R$$Z(     9	@@@@@@~%>>> U   bj//S__s<7H7H%HHHL!!S\\11 --3VBJRZ@P@P-Q-Q  
 	")$$(4##BrzN$899@Acee
3 !188BIOO--..RY__5F5F1G1GG    
\12&C")77GGH# ,:33]65J4KLL ,'+$# $CCBGG
 "!}.. :a=)%044 
 *+6	&9Q<7+B+B$I   $2 /%+	 
  C H&(.BCJJ &5&;&;&=&=     &)C"#S\24FFFs   'AB= =CCc
                   t          j         |          }|j        D ]1}	 |j                                        |_        "# t          $ r Y .w xY w|	r-|+|j        d |	                                D             z   |_        t          |          sg }n|rmfd}t          t          j
                  t          d          k    rt          ||||||j        |          }nt          |||||j        |j        |	  	        }n                    j                            ||g          d          5 }t          |          |_        t!          |||j        ||          }d d d            n# 1 swxY w Y   |j        D ]	}||_        
|g}|r|S g S )Nc                V    g | ]&\  }}t           j                            ||           'S rx  rz  r}  s      r0   r=   z5FastParquetEngine.write_partition.<locals>.<listcomp>'  r~  r2   c                2                         | d          S )NTrp  )r  )rA   r   s    r0   rb   z3FastParquetEngine.write_partition.<locals>.<lambda>1  s    ryyTy:: r2   z0.1.4wb)compressionr-  )r   r  ro   rz   AttributeErrorr   rJ   r;   ra  rv   rb  r   r   r   r   r}   r   r`   ra   )r   rj  r,   r   r   r  return_metadatar-  r  r  r   r/   rI  r  filr   rJ  s      `             r0   write_partitionz!FastParquetEngine.write_partition  s    inn 	 	A!    	s%(%; &5&;&;&=&=  &C" 2ww 	CC 	::::F[455w9O9OOO*dHc;QW  + FG
 
 dH%566== "2ww#SZ[c                
  + +"*$C 	JIs!   ?
AA/.E))E-0E-c                   t          j         |          }|j        }|r|D ]I}	|	Et          |	t                    r|	D ]}
|                    |
           4|                    |	           J||_        |j                            |dg          }t          j        	                    |||j
        d           |j                            |dg          }t          j        	                    |||j
                   d S )Nr   F)r   no_row_groupsr   rr  )r   rq   ry   rC   r|   r   r   rv   r  write_common_metadatar   )r   r   r
  r   r,   r|   r   r  rI  r   rr{   s               r0   write_metadataz FastParquetEngine.write_metadataP  s   	$o 	 ' '>!"d++ '!# * *AJJqMMMM* 

2"EdK011B44ERWE 5   
 V[[$ 234400Ubg0NNNNNr2   )NNNN)NNNNNNr   NNFNN)Nr@   NNN)NNNNN)FNFNr   rn  NNrp   )F)__name__
__module____qualname__classmethodr   r   r   r   r  r%  r  r1  r3  rL  r@  r  r  r  r@   r2   r0   r]   r]   w   sb       @
 @
 [@
D " " ["H 
    [4 M
 M
 [M
^ Q Q [Qf C+ C+ [C+J I I [IV 
  #"#H+ H+ H+ [H+T ( ( [(  }D }D }D [}D~  ^ ^ ^ [^@  kG kG kG [kGZ  > > > [>@ O O O [O O Or2   r]   )?
__future__r   r   r<  	threadingrM   collectionsr   r   
contextlibr   numpyr   pandasr   tlzrE   packaging.versionr   ra  	dask.corer   dask.dataframe._compatr	   rv   r
   fastparquet.utilr   r   r   r   fastparquet.writerr   r   ImportError	dask.baser   dask.dataframe.io.parquet.utilsr   r   r   r   r   r   r   r   r   r   dask.dataframe.io.utilsr   r   r   dask.dataframe.utilsr   dask.delayedr    
dask.utilsr!   RLockr>  r[   r   r]   r@   r2   r0   <module>r     s   " " " " " "        0 0 0 0 0 0 0 0                         4 4 4 4 4 4       0 0 0 0 0 0	''''''XXXXXXXXXXXXGGGGGGGGG 	 	 	D	      
                        W V V V V V V V V V 3 3 3 3 3 3             ' ' ' ' ' '  	!!; ; ;~  
mO mO mO mO mO mO mO mO mO mOs   A# #A+*A+