
    HR-eL                     x    d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 dZg Zd Z	 dd	Zdd
Zd Zd Zd ZdS )z
This package contains functions for reading and writing Parquet
tables that are not meant to be used directly, but instead are
available as readers/writers in `astropy.table`.  See
:ref:`astropy:table_io` for more details.
    N)
minversion)AstropyUserWarning)NOT_OVERWRITING_MSGs   PAR1c                     |\	 |                                 }n# t          $ r Y dS w xY w|                    d          }|                    |           |t          k    S ||                    d          S dS )a  Checks if input is in the Parquet format.

    Parameters
    ----------
    origin : Any
    filepath : str or None
    fileobj : `~pyarrow.NativeFile` or None
    *args, **kwargs

    Returns
    -------
    is_parquet : bool
        True if 'fileobj' is not None and is a pyarrow file, or if
        'filepath' is a string ending with '.parquet' or '.parq'.
        False otherwise.
    NF   )z.parquetz.parq)tellAttributeErrorreadseekPARQUET_SIGNATUREendswith)originfilepathfileobjargskwargspos	signatures          7lib/python3.11/site-packages/astropy/io/misc/parquet.pyparquet_identifyr      s    " 	,,..CC 	 	 	55	 LLOO	S---		  !6777us    
''Fc           	        %&' t                      \  }}}t          | t          t          j        f          st          | d          st          d          |                    |           }|j        $d |j        	                                D             }	ni }	ddl
m}
m}m}m} i }d|	v rJ|	                    d                              d          }|                    |          }d	|v r|d	         }nd}d
 |j        D             }d}d|v r,d}|d         }|D ]}t'          ||                   D ]&||&<    t)          |                                          '|'                    |           |'                    |           t1          t2                              'fd|                                D                                 'g }'D ]7&&fd|	                                D             }|                    |           8|st9          d          |rIt1          |d                                                   D ]!}|'vr|d                             |           "|s |                    | ||          }|j        }nd}g }|D ]&|                     &          j!        }d}t          ||j"                  r|j#        }|j$        f}nt          ||j%                  r|j#        }n|}||&                                |'                                fvrT|(|(                    |)                                           n)|(                    |)                                |f           d& }||	v rtU          |	|                   }nc|r!d}tW          j,        d| dtZ                     n@t]          d |&         D                       }tW          j,        d| d| dtZ                     ||&                                k    rd| nd| }||(                    |           |(                    ||f           |r@t_          j0        dt1          tc          ||                              }  || |          }!n ||          }!tc          ||          D ]\  &%|&         2                                }"|                     &          j!        }||&                                |'                                fv r|"3                    %          }"nt          ||j"                  rti          |"          dk    r_t_          j5        |"          }"|j#        |&                                |'                                fv r|"3                    %d                   }"nt_          j0        d%          }"npt          ||j%                  r[|j#        |&                                |'                                fv r,t_          j6        %fd|"D             t^          j7                  }"|!8                     |
&|"                     |dd  |d!         D             }#|!j9                                        D ]8}"d"D ]3}$|$|#|"j:                 v r"tw          |"|$|#|"j:                 |$                    49|<                    |!          }!|!S )#a	  
    Read a Table object from a Parquet file.

    This requires `pyarrow <https://arrow.apache.org/docs/python/>`_
    to be installed.

    The ``filters`` parameter consists of predicates that are expressed
    in disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
    DNF allows arbitrary boolean logical combinations of single column
    predicates. The innermost tuples each describe a single column predicate.
    The list of inner predicates is interpreted as a conjunction (AND),
    forming a more selective and multiple column predicate. Finally, the most
    outer list combines these filters as a disjunction (OR).

    Predicates may also be passed as List[Tuple]. This form is interpreted
    as a single conjunction. To express OR in predicates, one must
    use the (preferred) List[List[Tuple]] notation.

    Each tuple has format: (``key``, ``op``, ``value``) and compares the
    ``key`` with the ``value``.
    The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
    ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
    ``value`` must be a collection such as a ``list``, a ``set`` or a
    ``tuple``.

    For example:

    .. code-block:: python

        ('x', '=', 0)
        ('y', 'in', ['a', 'b', 'c'])
        ('z', 'not in', {'a','b'})

    Parameters
    ----------
    input : str or path-like or file-like object
        If a string or path-like object, the filename to read the table from.
        If a file-like object, the stream to read data.
    include_names : list [str], optional
        List of names to include in output. If not supplied, then
        include all columns.
    exclude_names : list [str], optional
        List of names to exclude from output (applied after ``include_names``).
        If not supplied then no columns are excluded.
    schema_only : bool, optional
        Only read the schema/metadata with table information.
    filters : list [tuple] or list [list [tuple] ] or None, optional
        Rows which do not match the filter predicate will be removed from
        scanned data.  See `pyarrow.parquet.read_table()` for details.

    Returns
    -------
    table : `~astropy.table.Table`
        Table will have zero rows and only metadata information
        if schema_only is True.
    r
   z5pyarrow can only open path-like or file-like objects.Nc                 f    i | ].\  }}|                     d           |                     d           /S zUTF-8)decode.0kvs      r   
<dictcomp>z&read_table_parquet.<locals>.<dictcomp>   s6    WWWtq!ahhw'!2!2WWW    r   )ColumnTablemeta	serializetable_meta_yaml
r#   c                     i | ]}||S  r(   )r   names     r   r   z&read_table_parquet.<locals>.<dictcomp>   s    >>>$>>>r    F__serialized_columns__Tc                     g | ]}|v |	S r(   r(   )r   x	use_namess     r   
<listcomp>z&read_table_parquet.<locals>.<listcomp>   s    PPPQiqr    c                 &    g | ]\  }}|k    |S r(   r(   )r   ncolr)   s      r   r.   z&read_table_parquet.<locals>.<listcomp>   s"    KKKvq#ts{{{{{r    z3No include_names specified were found in the table.)columnsfilterstable::len::
   zNo z1 found in metadata. Guessing {strlen} for schema.c              3   X   K   | ]%}t          |                                          V  &d S )N)lenas_py)r   rows     r   	<genexpr>z%read_table_parquet.<locals>.<genexpr>   s2      DDcSYY[[))DDDDDDr    z* found in metadata. Using longest string (z characters).Uz|S)dtype)datar#   )r#   c                 :    g | ]}|                               S r(   astype)r   r9   dts     r   r.   z&read_table_parquet.<locals>.<listcomp>  s#    #B#B#BsCJJrNN#B#B#Br    )r)   r=   c                      i | ]}|d          |S )r)   r(   )r   r,   s     r   r   z&read_table_parquet.<locals>.<dictcomp>&  s    BBBqy!BBBr    datatype)descriptionformatunitr#   )=get_pyarrow
isinstancestrosPathLikehasattr	TypeErrorread_schemametadataitemsastropy.tabler!   r"   r#   r$   popsplitget_header_from_yamlnames
_get_namessetvaluesintersection_updatedifference_updatelistdictfromkeysextend
ValueErrorkeys
read_tablenum_rowsfieldtypeFixedSizeListType
value_type	list_sizeListTypestringbinaryappendto_pandas_dtypeintwarningswarnr   maxnpzeroszipto_numpyr@   r7   stackarrayobject_
add_columnr2   r)   setattr_construct_mixins_from_columns)(inputinclude_namesexclude_namesschema_onlyr3   paparquet_schemamdr!   r"   r#   r$   	meta_dict	meta_yamlmeta_hdrfull_table_columnshas_serialized_columnsserialized_columnsscolnames_to_readrU   pa_tablerb   r<   tshaperf   md_namestrlenstrnamer=   tabler1   header_colsattrrA   r)   r-   s(                                        @@@r   read_table_parquetr   ;   s   v !]]NBec2;/00 U uf%% 	USTTT  ''F "WWv?T?T?V?VWWW<<<<<<<<<<<< IBFF,--33D99	,,Y77X (I ?>>>>"9,,!%&'?@& 	0 	0D"#5d#;<< 0 0+/"4((0 &--//00I %%m444 ##M222 PPPP"4";";"="=PPPQQ I M $ $KKKK!3!9!9!;!;KKKU#### PNOOO  >#;<AACCDD 	> 	>D9$$2377===  %%e]G%TT$ E 4+ 4+LL#a-.. 		J[NEE2;'' 	JJ Jbiikk299;;777 }Z7799::::j88::EBCCC (''b==G%%FF 	 FMRgRRR"   
 DDXd^DDDDDFM+g + ++ + +"  
 #-		";";,f,,,f =LL!!!!LL'5)**** (:xc-&?&?!@!@AAA4i000
 9%%%M511 	: 	:HD"4.))++CT""'ARYY[["))++...jjnnAr344 V s88a<<(3--C|		RYY[['AAA
 "jjA// (1B///CCAr{++ V <BIIKK#===(#B#B#B#Bc#B#B#B"*UUUCVVC8889999 CBXj-ABBB='')) 	D 	DCA D D;sx000C{38'<T'BCCCD 44U;;ELr    c           
      |
   ddl m}m} ddlm} t                      \  }}}t          |t          t          j	        f          st          d|            |d          5  |                    |           }	ddd           n# 1 swxY w Y   |                    |	          }
d                    |
          }g }|	j        j        D ]Q}|	j        |         }|j        t"          j        k    rt'          |	          dk    rk|	|         d         j        }|	|         D ] }|j        |k    rt)          d| d	          !|                    |                    |j                  
          }nt)          d| d	          t'          |j                  dk    rR|                    |                    |j        d         j                  t#          j        |j                            }n|                    |j                  }|                    ||f           Si }|	j                                        D ]\  }}|j        j        }|j        j        }|t"          j        u r_|	|         d         j        j        }|t"          j        k    s|t"          j        k    r'd}|	|         D ]}tA          ||j        j                  }|t"          j        u rt          |dz            |d| <   n#|t"          j        u rt          |          |d| <   ||d<   d |                                D             }|!                    ||          }t          j"        #                    |          r8|rt          j$        |           n!tK          tM          j'        |                    |(                    |||          5 }g }|	j        j        D ]1}|	j        |         }|)                    d          |j        t"          j        k    rfd|	|         D             }nt'          |j                  dk    rkt'          |	          dk    rUt#          j*        |	|         +                                ,                    j-        d          t'          |	                    }n g }n|	|         ,                    d          }|                    |.                    ||/                    |          j                             3|j0        1                    ||          }|2                    |           ddd           dS # 1 swxY w Y   dS )a  
    Write a Table object to a Parquet file.

    The parquet writer supports tables with regular columns, fixed-size array
    columns, and variable-length array columns (provided all arrays have the
    same type).

    This requires `pyarrow <https://arrow.apache.org/docs/python/>`_
    to be installed.

    Parameters
    ----------
    table : `~astropy.table.Table`
        Data table that is to be written to file.
    output : str or path-like
        The filename to write the table to.
    overwrite : bool, optional
        Whether to overwrite any existing file without warning. Default `False`.

    Notes
    -----
    Tables written with array columns (fixed-size or variable-length) cannot
    be read with pandas.

    Raises
    ------
    ValueError
        If one of the columns has a mixed-type variable-length array, or
        if it is a zero-length table and any of the columns are variable-length
        arrays.
    r   )r#   r$   )serialize_context_asz.`output` should be a string or path-like, not r   Nr&   z$Cannot serialize mixed-type column (z) with parquet.)rf   z7Cannot serialize zero-length table with object column ()rf   rg   r   r4   r%   c                 f    i | ].\  }}|                     d           |                     d           /S r   )encoder   s      r   r   z'write_table_parquet.<locals>.<dictcomp>  sC       15A188G,,  r    )rO   )version<c                 >    g | ]}|                     d           S )Fcopyr?   )r   r9   dt_news     r   r.   z'write_table_parquet.<locals>.<listcomp>  s)    TTT#szz&uz55TTTr    Fr   )rd   )r   )3rQ   r#   r$   astropy.utils.data_infor   rG   rH   rI   rJ   rK   rM   represent_mixins_as_columnsget_yaml_from_tablejoinr<   rU   rd   rq   rw   r7   r_   list_from_numpy_dtyper   subdtypeprodrk   r2   rP   itemsizestr_bytes_rp   r   pathexistsremoveOSErrorr   rE   ParquetWriternewbyteorderrS   ravelr@   baserv   rc   r"   from_arrayswrite_table)r   output	overwriter#   r$   r   r   r   writer_versionencode_tabler   meta_yaml_str	type_listr)   rA   	obj_dtyper9   
arrow_typerO   r1   r   r   metadata_encoder   writerarraysvalr   r   s                               @r   write_table_parquetr   3  s    @ .-------<<<<<<"---BfsBK011 SQQQRRR 
	i	(	( D D <<UCCD D D D D D D D D D D D D D D ((66IIIi((M I"( '- '-%7bj   <  1$$(.q17	 (-  CyI--(X4XXX   .  XX!229>BB &  

 !A+/A A A   ]]Q ..r{1~/BCC'"(++ "  JJ ,,RW55J$
+,,,,H!)//11 4 4	c
 IN9%
??T"1%+0ABG||qBI~~'- A AC"8SY-?@@HH<<.1(a-.@.@H*D**++")^^.1(mmH*D**+&3"## 9A9I9I  O YYy?Y;;F	w~~f > 	>If-4V<<=== 
		vv~		F	F %& &, 	G 	GD#D)B __S))Fw"*$$TTTTdASTTTRX""|$$q((($T*002299&+E9RRL)) CC
 CC"4(//U/CCMM"((3V\\$-?-?-D(EEFFFF 8''v'>>8$$$9% % % % % % % % % % % % % % % % % %s%    BB	B1E3T11T58T5c                     g }|                                  D ]X\  }}t          |t                    r#|                    t	          |                     =|dk    r|                    |           Y|S )a"  Recursively find the names in a serialized column dictionary.

    Parameters
    ----------
    _dict : `dict`
        Dictionary from astropy __serialized_columns__

    Returns
    -------
    all_names : `list` [`str`]
        All the column names mentioned in _dict and sub-dicts.
    r)   )rP   rH   r\   r^   rV   rk   )_dict	all_namesr   r   s       r   rV   rV     sw     I    1a 	 Z]]++++&[[Qr    c                      ddl m}  ddlm} |                     d|t
                     |                     d|t                     |                     d|t                     dS )z,
    Register Parquet with Unified I/O.
    r   )registry)r"   r   N)

astropy.ior   rQ   r"   register_readerr   register_writerr   register_identifierr   )io_registryr"   s     r   register_parquetr     sy     322222######	52DEEE	52EFFF##Iu6FGGGGGr    c                      	 dd l } ddl m} n# t          $ r t          d          w xY wt	          | d          rd}nd}| ||fS )Nr   )r   z3pyarrow is required to read and write parquet filesz6.0.0z2.4z2.0)pyarrowr   ImportError	Exceptionr   )r   r   r   s      r   rG   rG     s    O####### O O OMNNNO "g w&&s   
 ')NNFN)F)__doc__rJ   rn   numpyrq   astropy.utilsr   astropy.utils.exceptionsr   astropy.utils.miscr   r   __all__r   r   r   rV   r   rG   r(   r    r   <module>r      s     
			      $ $ $ $ $ $ 8 7 7 7 7 7 2 2 2 2 2 2 
  D OSu u u up_% _% _% _%D  ,	H 	H 	H' ' ' ' 'r    