
    o[we                         d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d d	lmZmZ d d
lmZ erd dlmZ  G d de          ZdS )    )annotations)TYPE_CHECKING)using_pyarrow_string_dtype)lib)import_optional_dependency)
is_integerN)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                  B     e Zd ZdZd fdZd ZddZddZddZ xZ	S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                    t                                          |           || _        || _        |                                  d S )N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__s      Flib/python3.11/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   zArrowParserWrapper.__init__   sA    	    c                    | j                             d          }|dn|| _        | j         d         }t          |t                    rt          d          t          | j         d                   | _        dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr   
isinstancedict
ValueErrorlistr   )r   r   r   s      r   r   zArrowParserWrapper._parse_kwds%   sv      $y}}Z88#+#3Ik*	i&& 	Q   di455r   c                Z   dddddd}|                                 D ]J\  }}|| j        v r<| j                            |          "| j                            |          | j        |<   K| j        }t          |t                    r|g}nd}|| j        d<   d	 | j                                         D             | _        d
 | j                                         D             | _        d| j        d         v | j        d<   | j	        du | j	        | j	        n| j        d         | j
        d| _        dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point)usecolsr   
escapecharskip_blank_linesdecimalNtimestamp_parsersc                &    i | ]\  }}||dv ||S )N)	delimiter
quote_charr(   r)    .0option_nameoption_values      r   
<dictcomp>z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>Q   sI     
 
 
)\'OP P P P Pr   c                &    i | ]\  }}||dv ||S )N)r&   r'   true_valuesfalse_valuesr*   r/   r3   r4   s      r   r8   z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>X   sF      
  
  
)\'    r    strings_can_be_nullskiprows)autogenerate_column_names	skip_rowsr   )itemsr   r    popdate_formatr!   strparse_optionsconvert_optionsheaderr   read_options)r   mappingpandas_namepyarrow_namerC   s        r   _get_pyarrow_optionsz'ArrowParserWrapper._get_pyarrow_options3   sh   
 )&' 4&
 
 *1 	E 	E%Kdi''DIMM+,F,F,R*.)--*D*D	,'
 &k3'' 	&-KK K)4	%&
 
-1Y__->->
 
 
 
  
-1Y__->-> 
  
  
 79DIm<T6T23)-)<{& :&
 
r   framer	   c                L   t          j                  }d}| j        | j        | j        t	          |          | _        t          | j                  |k    r@t          t	          |t          | j                  z
                      | j        z   | _        d}| j        _        |                     j                  \  }| j        Q| j                                        }t          | j                  D ]\  }}t          |          rj        |         ||<   n|j        vrt          d| d          | j        | j                            |          || j                            |          fn1j        |         | j                            j        |                   f\  }}	|	&|                             |	          |<   | j        |= ܉                    |dd           | j        )|s'dgt          j        j                  z  j        _        | j        t#          | j        t$                    r*fd| j                                        D             | _        	                     | j                  n!# t(          $ r}
t          |
          d}
~
ww xY wS )z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)dropinplacec                .    i | ]\  }}|j         v ||S r3   )columns)r5   kvrM   s      r   r8   z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>   s+    XXXtq!Q%-EWEWaEWEWEWr   )lenrR   rG   namesranger$   _do_date_conversions	index_colcopy	enumerater   r#   dtyper    astype	set_indexindexr!   r"   rA   	TypeError)r   rM   num_colsmulti_index_named_index_to_setiitemkey	new_dtypees    `         r   _finalize_pandas_outputz*ArrowParserWrapper._finalize_pandas_outputo   s    u}%% ;z!;&!&xDJ4:(**
 "%3tz??(B"C"CDDtzQ
$)! JEM,,U]EBB5>%>..00L$T^44 , ,4d## >&+mD&9LOO..$%<d%<%<%<=== :)  :>>$//; tz~~d3344#mD14:>>%-PTBU3V3VW #C
 !,%*3Z%6%6y%A%Ac
 JsOOOLtTOBBB{"+<"%)FS1B-C-C$C!:! $*d++ YXXXXtz/?/?/A/AXXX
$TZ00 $ $ $ mm#$ s   (J 
J!JJ!c           
     4   t          d          }t          d          }|                                  |                    | j         |j        di | j         |j        di | j         |j        di | j	                  }| j
        d         }|t          j        u r|j        }|                                }t          |j        j                  D ][\  }}|j                            |          r<|                    ||                    |                              |                    }\|                    |          }|dk    r!|                    t.          j                  }	n|dk    rRt3                      }
t/          j                    |
|                                <   |                    |
j                  }	nEt;                      r#|                    t=                                }	n|                                }	|                     |	          S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csv)rH   rE   rF   dtype_backend)types_mappernumpy_nullabler3   ) r   rL   read_csvr   ReadOptionsrH   ParseOptionsrE   ConvertOptionsrF   r   r   
no_defaultschemafloat64r[   typesis_nullsetfield	with_typecast	to_pandaspd
ArrowDtyper
   
Int64Dtypenullr    r   r   rj   )r   papyarrow_csvtablerm   
new_schemanew_typere   
arrow_typerM   dtype_mappings              r   readzArrowParserWrapper.read   s    (	220??!!###$$H00EE43DEE2+2HHT5GHH6K6NN9MNN	 % 
 
 	/2 CN**Jzz||H!*5<+=!>!>  :8##J// !+:++A..88BB" "J JJz**EI%%OOO??EE... 122M')}M"''))$OO1BOCCEE')) 	&OO1J1L1LOMMEEOO%%E++E222r   )r   r   r   r   )r   r   )rM   r	   r   r	   )r   r	   )
__name__
__module____qualname____doc__r   r   rL   rj   r   __classcell__)r   s   @r   r   r      s              6 6 6:
 :
 :
 :
xA A A AF13 13 13 13 13 13 13 13r   r   )
__future__r   typingr   pandas._configr   pandas._libsr   pandas.compat._optionalr   pandas.core.dtypes.inferencer   pandasr~   r	   pandas.io._utilr
   r   pandas.io.parsers.base_parserr   pandas._typingr   r   r3   r   r   <module>r      s#   " " " " " "             5 5 5 5 5 5       > > > > > > 3 3 3 3 3 3                  5 4 4 4 4 4 *))))))J3 J3 J3 J3 J3 J3 J3 J3 J3 J3r   