
    3 d7                         d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZmZ  ej        e          Zd ZddZ G d	 d
          ZddZddZdS )    N)StringIO)ResponseTextResponse)Selector)
re_rsearch
to_unicodec              #     K   t          j        |          }t          j        dt           j                  }t          j        d| dt           j                  }t          j        dt           j                  }t          j        dt           j                  }t	          |           }t          j        ||          }|r&|                                                                nd}t          ||          }	|	r"||	d         d                                         nd}
i }|
rt          t          j
        ||
                    D ]z}t          j        d	| d
|d|	d                  t           j                  }|rD|                    d t          j
        ||                                          D                        {t          j        d| d| dt           j                  }|                    |          D ]n}||                                                    || dd                    |                                           d          z   |
z   }t#          |d          V  odS )zReturn a iterator of Selector's over all nodes of a XML document,
       given the name of the node to iterate. Useful for parsing XML feeds.

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8
    z<\?xml[^>]+>\s*z<\s*/z\s*>z<\s*/([^\s>]+)\s*>z((xmlns[:A-Za-z]*)=[^>\s]+)    Nz<\s*z.*?xmlns[:=][^>]*>c              3   4   K   | ]}t          |          V  d S N)reversed).0xs     6lib/python3.11/site-packages/scrapy/utils/iterators.py	<genexpr>zxmliter.<locals>.<genexpr>)   s9       " "$%HQKK" " " " " "    <z
[\s>].*?</> xmltexttype)reescapecompileS_body_or_strsearchgroupstripr   r   findallupdateDOTALLfinditerreplacejoinvaluesr   )objnodenamenodename_pattDOCUMENT_HEADER_REHEADER_END_RE
END_TAG_RENAMESPACE_REr   document_headerheader_end_idx
header_end
namespacestagnametagrmatchnodetexts                    r   xmliterr:      sx      Ih''M$6==J;;;;RTBBM12488J:<bdCCLDi 2D99O9HPo++--33555bOt44N6DLnQ'))*00222"JJ 
:z B BCC 	 	G)3333T:MN1<M:M5NPRPT C  !! " ")+L#))++)N)N" " "    	
AAAAAA29MMAD!! 2 2kkmm##XGG1B1B1D1D(E(EGG  	 	 H511111112 2r   r   c              #     K   ddl m} t          |           }|rd| d| n|}|                    |||j                  }d|r| d| n|z   }|D ]v\  }	}
|                    |
d	          }|
                                 t          |d
          }|r|                    ||           |	                    |          d         V  wd S )Nr   )etree{})r6   encodingz//:unicode)r?   r   r   )
lxmlr<   _StreamReader	iterparser?   tostringclearr   register_namespacexpath)r*   r+   	namespaceprefixr<   readerr6   iterableselxpath_noder9   xss                r   xmliter_lxmlrQ   9   s     3F*3
A
&y
&
&H
&
&
&Cv3IIHH&--8---IH $ $4>>$>;;

8%000 	5!!&)444hhx  #####$ $r   c                   ,    e Zd Zd ZddZddZddZdS )rC   c                     d| _         t          |t                    r|j        |j        c| _        | _        n|dc| _        | _        t          | j        t                    | _        d S )Nr   utf-8)_ptr
isinstancer   bodyr?   _textstr_is_unicode)selfr*   s     r   __init__z_StreamReader.__init__J   s[    	c8$$ 	5(+#,%DJ(+W%DJ%dj#66r     c                     | j         r| j        n| j        | _        |                     |                                          S r   )rZ   _read_unicode_read_stringreadlstrip)r[   ns     r   ra   z_StreamReader.readR   s8    *.*:QD&&@Q	yy||""$$$r   c                 P    | j         | j         |z   }}|| _         | j        ||         S r   )rU   rX   r[   rc   ses       r   r`   z_StreamReader._read_stringV   s*    y$)a-1	z!A#r   c                 v    | j         | j         |z   }}|| _         | j        ||                             d          S )NrT   )rU   rX   encodere   s       r   r_   z_StreamReader._read_unicode[   s8    y$)a-1	z!A#%%g...r   N)r]   )__name__
__module____qualname__r\   ra   r`   r_    r   r   rC   rC   I   s_        7 7 7% % % %   
/ / / / / /r   rC   c           	   #   P  K   t          | t                    r| j        npdfd}t          t	          | d                    }i }|r||d<   |r||d<   t          j        |fi |}|s-	 t          |          }	n# t          $ r Y dS w xY w ||	          }|D ]}	 ||	          }	t          |	          t          |          k    r?t                              d|j        t          |	          t          |          d	           lt          t          ||	                    V  dS )
a  Returns an iterator of dictionaries from the given csv object

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8

    delimiter is the character used to separate fields on the given obj.

    headers is an iterable that when provided offers the keys
    for the returned dictionaries, if not the first row is used.

    quotechar is the character used to enclosure fields on the given obj.
    rT   c                      fd| D             S )Nc                 0    g | ]}t          |          S rm   )r   )r   fieldr?   s     r   
<listcomp>z3csviter.<locals>.row_to_unicode.<locals>.<listcomp>t   s#    >>>
5(++>>>r   rm   )row_r?   s    r   row_to_unicodezcsviter.<locals>.row_to_unicodes   s    >>>>>>>>r   T)rA   	delimiter	quotecharNzGignoring row %(csvlnum)d (length: %(csvrow)d, should be: %(csvheader)d))csvlnumcsvrow	csvheader)rV   r   r?   r   r   csvrK   nextStopIterationlenloggerwarningline_numdictzip)
r*   ru   headersr?   rv   rt   lineskwargscsv_rrows
      `      r   csviterr   a   s        *#|<<Us||(BUgH? ? ? ? ? \#t44455EF ('{ ('{Ju''''E &	u++CC 	 	 	FF	 .%% & &nS!!s88s7||# 
	NN,  %~!#hh!$W    3w$$%%%%%%& &s   ,A< <
B
	B
Tc           
         t           t          t          f}t          | |          sId                    d |D                       }t          d| d| dt          |           j                   t          | t                     r?|s| j        S t          | t                    r| j
        S | j                            d          S t          | t                    r|r| n|                     d          S |r|                     d          n| S )Nz or c              3   $   K   | ]}|j         V  d S r   )rj   )r   ts     r   r   z_body_or_str.<locals>.<genexpr>   s$      (L(L(L(L(L(L(L(Lr   zObject z	 must be z, not rT   )r   rY   bytesrV   r(   	TypeErrorr   rj   rW   r   r   decoderi   )r*   rA   expected_typesexpected_types_strs       r   r   r      s   U+Nc>** 
#[[(L(L^(L(L(LLLTcTT&8TTS		@RTT
 
 	
 #x   ( 	8Oc<(( 	8Oxw'''#s 76ss3::g#6#66")23::gs2r   )Nr   )NNNN)T)rz   loggingr   ior   scrapy.httpr   r   scrapy.selectorr   scrapy.utils.pythonr   r   	getLoggerrj   r~   r:   rQ   rC   r   r   rm   r   r   <module>r      s   



  				       . . . . . . . . $ $ $ $ $ $ 6 6 6 6 6 6 6 6		8	$	$)2 )2 )2X$ $ $ $ / / / / / / / /02& 2& 2& 2&j3 3 3 3 3 3r   