
    1egJ                     v    d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ d Z G d	 d
          ZdS )z;
Item Loader

See documentation in docs/topics/loaders.rst
    )suppress)ItemAdapter)extract_regexflatten)wrap_loader_context)Identity)arg_to_iterc                     t          t                    5  d| j        vr| j        cddd           S 	 ddd           n# 1 swxY w Y   | S )z
    Allow to use single-argument functions as input or output processors
    (no need to define an unused first 'self' argument)
    .N)r   AttributeError__qualname____func__)methods    4lib/python3.11/site-packages/itemloaders/__init__.pyunbound_methodr      s    
 
.	!	! # #f)))?# # # # # # # #)# # # # # # # # # # # # # # # Ms   ?AAc                   ^   e Zd ZdZeZ e            Z e            Zd"dZ	e
d             Ze
d             Zd Zd Zddd	Zddd
Zd Zd ZdddZd Zd Zd Zd Zd Zd#dZd Zd ZdddZdddZdddZd ZdddZ dddZ!dddZ"d Z#dddZ$dddZ%ddd Z&d! Z'dS )$
ItemLoadera
  
    Return a new Item Loader for populating the given item. If no item is
    given, one is instantiated automatically using the class in
    :attr:`default_item_class`.

    When instantiated with a :param ``selector`` parameter the :class:`ItemLoader` class
    provides convenient mechanisms for extracting data from web pages
    using parsel_ selectors.

    :param item: The item instance to populate using subsequent calls to
        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
        :meth:`~ItemLoader.add_jmes` or :meth:`~ItemLoader.add_value`.
    :type item: :class:`dict` object

    :param selector: The selector to extract data from, when using the
        :meth:`add_xpath` (resp. :meth:`add_css`, :meth:`add_jmes`) or :meth:`replace_xpath`
        (resp. :meth:`replace_css`, :meth:`replace_jmes`) method.
    :type selector: :class:`~parsel.selector.Selector` object

    The item, selector and the remaining keyword arguments are
    assigned to the Loader context (accessible through the :attr:`context` attribute).

    .. attribute:: item

        The item object being parsed by this Item Loader.
        This is mostly used as a property so when attempting to override this
        value, you may want to check out :attr:`default_item_class` first.

    .. attribute:: context

        The currently active :ref:`Context <loaders-context>` of this Item Loader.
        Refer to <loaders-context> for more information about the Loader Context.

    .. attribute:: default_item_class

        An Item class (or factory), used to instantiate items when not given in
        the ``__init__`` method.

        .. warning:: Currently, this factory/class needs to be
            callable/instantiated without any arguments.
            If you are using ``dataclasses``, please consider the following
            alternative::

                from dataclasses import dataclass, field
                from typing import Optional

                @dataclass
                class Product:
                    name: Optional[str] = field(default=None)
                    price: Optional[float] = field(default=None)

    .. attribute:: default_input_processor

        The default input processor to use for those fields which don't specify
        one.

    .. attribute:: default_output_processor

        The default output processor to use for those fields which don't specify
        one.

    .. attribute:: selector

        The :class:`~parsel.selector.Selector` object to extract data from.
        It's the selector given in the ``__init__`` method.
        This attribute is meant to be read-only.

    .. _parsel: https://parsel.readthedocs.io/en/latest/
    Nc                 v   || _         |                    |           ||                                 }|| _        ||d<   || _        || _        i | _        t          |                                          D ]B\  }}| j	        
                    |g            | j	        |xx         t          |          z  cc<   Cd S )Nselectoritem)r   updatedefault_item_class_local_itemcontextparent_local_valuesr   items_values
setdefaultr	   )selfr   r   r   r   
field_namevalues          r   __init__zItemLoader.__init__f   s     )))<**,,D!,T!2!2!8!8!:!: 	; 	;JL##J333L$$$E(:(::$$$$	; 	;    c                 6    | j         | j         j        S | j        S N)r   r   r   r!   s    r   r   zItemLoader._valuesu   s    ;";&&%%r%   c                 6    | j         | j         j        S | j        S r'   )r   r   r   r(   s    r   r   zItemLoader.item|   s    ;";####r%   c                     | j                             |          }|                    |            | j        d| j        | d|}|S )am  
        Create a nested loader with an xpath selector.
        The supplied selector is applied relative to selector associated
        with this :class:`ItemLoader`. The nested loader shares the item
        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
        r   r   r    )r   xpathr   	__class__r   )r!   r-   r   r   	subloaders        r   nested_xpathzItemLoader.nested_xpath   sS     =&&u--)))"DNJ	$JJ'JJ	r%   c                     | j                             |          }|                    |            | j        d| j        | d|}|S )aj  
        Create a nested loader with a css selector.
        The supplied selector is applied relative to selector associated
        with this :class:`ItemLoader`. The nested loader shares the item
        with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
        :meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
        r   r+   r,   )r   cssr   r.   r   )r!   r2   r   r   r/   s        r   
nested_csszItemLoader.nested_css   sS     =$$S)))))"DNJ	$JJ'JJ	r%   )rec                     | j         |g|R d|i|}|dS |s2|                                D ]\  }}|                     ||           dS |                     ||           dS )a  
        Process and then add the given ``value`` for the given field.

        The value is first passed through :meth:`get_value` by giving the
        ``processors`` and ``kwargs``, and then passed through the
        :ref:`field input processor <processors>` and its result
        appended to the data collected for that field. If the field already
        contains collected data, the new data is added.

        The given ``field_name`` can be ``None``, in which case values for
        multiple fields may be added. And the processed value should be a dict
        with field_name mapped to values.

        Examples::

            loader.add_value('name', 'Color TV')
            loader.add_value('colours', ['white', 'blue'])
            loader.add_value('length', '100')
            loader.add_value('name', 'name: foo', TakeFirst(), re='name: (.+)')
            loader.add_value(None, {'name': 'foo', 'sex': 'male'})
        r4   N)	get_valuer   
_add_valuer!   r"   r#   r4   
processorskwkvs           r   	add_valuezItemLoader.add_value   s    , u?z???b?B??=F 	/ & &11%%%%& & OOJ.....r%   c                     | j         |g|R d|i|}|dS |s2|                                D ]\  }}|                     ||           dS |                     ||           dS )z
        Similar to :meth:`add_value` but replaces the collected data with the
        new value instead of adding it.
        r4   N)r6   r   _replace_valuer8   s           r   replace_valuezItemLoader.replace_value   s    
 u?z???b?B??=F 	3 * *1##Aq))))* * 
E22222r%   c                     t          |          }|                     ||          }|r?| j                            |g            | j        |xx         t          |          z  cc<   d S d S r'   )r	   _process_input_valuer   r    )r!   r"   r#   processed_values       r   r7   zItemLoader._add_value   s}    E""33JFF 	EL##J333L$$$O(D(DD$$$$$	E 	Er%   c                 h    | j                             |d            |                     ||           d S r'   )r   popr7   )r!   r"   r#   s      r   r?   zItemLoader._replace_value   s4    T***
E*****r%   c                h   r*t          |          }t          fd|D                       }|D ]}| n||}t          || j                  }	  ||          }*# t          $ rK}t          d|j        j        d|dt          |          j        dt          |          d	          |d}~ww xY w|S )ar  
        Process the given ``value`` by the given ``processors`` and keyword
        arguments.

        Available keyword arguments:

        :param re: a regular expression to use for extracting data from the
            given value using :func:`~parsel.utils.extract_regex` method,
            applied before processors
        :type re: str or typing.Pattern

        Examples:

        >>> from itemloaders import ItemLoader
        >>> from itemloaders.processors import TakeFirst
        >>> loader = ItemLoader()
        >>> loader.get_value('name: foo', TakeFirst(), str.upper, re='name: (.+)')
        'FOO'
        c              3   8   K   | ]}t          |          V  d S r'   )r   ).0xr4   s     r   	<genexpr>z'ItemLoader.get_value.<locals>.<genexpr>   s-      @@QM"a00@@@@@@r%   NzError with processor  value= error=': ')
r	   r   r   r   	Exception
ValueErrorr.   __name__typestr)r!   r#   r4   r9   r:   proc_proces     `     r   r6   zItemLoader.get_value   s    (  	A&&E@@@@%@@@@@E 	 	D}E&tT\::DU    j///Q8H8H8H#a&&&&R  
 s   A
B/$AB**B/c                     t          | j                  }t          | j                  D ]}|                     |          }||||<   |j        S )z
        Populate the item with the data collected so far, and return it. The
        data collected is first passed through the :ref:`output processors
        <processors>` to get the final value to assign to each item field.
        )r   r   tupler   get_output_value)r!   adapterr"   r#   s       r   	load_itemzItemLoader.load_item   sX     di((-- 	, 	,J))*55E &+
#|r%   c                 @   |                      |          }t          || j                  }| j                            |g           }	  ||          S # t
          $ rA}t          d|d|dt          |          j        dt          |          d	          |d}~ww xY w)z
        Return the collected values parsed using the output processor, for the
        given field. This method doesn't populate or modify the item at all.
        z#Error with output processor: field=rK   rL   rM   rN   N)
get_output_processorr   r   r   getrO   rP   rR   rQ   rS   )r!   r"   rT   r#   rV   s        r   rY   zItemLoader.get_output_value	  s    
 ((44"466  R00	4;; 	 	 	*::uuud1gg&6&6&6A@  	s   
A 
B<BBc                 8    | j                             |g           S )z0Return the collected values for the given field.)r   r^   )r!   r"   s     r   get_collected_valueszItemLoader.get_collected_values  s    |
B///r%   c                     t          | d|z  d           }|s|                     |d| j                  }t          |          S )Nz%s_ininput_processor)getattr_get_item_field_attrdefault_input_processorr   r!   r"   rT   s      r   get_input_processorzItemLoader.get_input_processor  sP    tWz1488 	,,-t/K D d###r%   c                     t          | d|z  d           }|s|                     |d| j                  }t          |          S )Nz%s_outoutput_processor)rc   rd   default_output_processorr   rf   s      r   r]   zItemLoader.get_output_processor%  sP    tX
2D99 	,,.0M D d###r%   c                 |    t          | j                                      |          }|                    ||          S r'   )r   r   get_field_metar^   )r!   r"   keydefault
field_metas        r   rd   zItemLoader._get_item_field_attr-  s3     ++:::FF
~~c7+++r%   c                 (   |                      |          }|}t          || j                  }	  ||          S # t          $ rN}t	          d|j        j        d|d|dt          |          j        dt          |          d          |d }~ww xY w)NzError with input processor z: field=rK   rL   rM   rN   )	rg   r   r   rO   rP   r.   rQ   rR   rS   )r!   r"   r#   rT   rU   rV   s         r   rB   zItemLoader._process_input_value1  s    ''
33"466	4;; 	 	 	* O,,,JJEEGG$$$FFFF
 
 
	s   
9 
BA	BBc                 L    | j         t          d| j        j        z            d S )NzFTo use XPath or CSS selectors, %s must be instantiated with a selector)r   RuntimeErrorr.   rQ   r(   s    r   _check_selector_methodz!ItemLoader._check_selector_methodD  s7    = 79=9PQ   ! r%   c                J     | j         |fi |} | j        ||g|R d|i| dS )a  
        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
        value, which is used to extract a list of strings from the
        selector associated with this :class:`ItemLoader`.

        See :meth:`get_xpath` for ``kwargs``.

        :param xpath: the XPath to extract data from
        :type xpath: str

        Examples::

            # HTML snippet: <p class="product-name">Color TV</p>
            loader.add_xpath('name', '//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')

        r4   N)_get_xpathvaluesr=   r!   r"   r-   r4   r9   r:   valuess          r   	add_xpathzItemLoader.add_xpathK  sM    & '&u3333z6DJDDD2DDDDDDr%   c                J     | j         |fi |} | j        ||g|R d|i| dS )z`
        Similar to :meth:`add_xpath` but replaces collected data instead of adding it.
        r4   N)ru   r@   rv   s          r   replace_xpathzItemLoader.replace_xpatha  sN     '&u3333:vH
HHHrHRHHHHHr%   c                D     | j         |fi |} | j        |g|R d|i|S )a  
        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
        selector associated with this :class:`ItemLoader`.

        :param xpath: the XPath to extract data from
        :type xpath: str

        :param re: a regular expression to use for extracting data from the
            selected XPath region
        :type re: str or typing.Pattern

        Examples::

            # HTML snippet: <p class="product-name">Color TV</p>
            loader.get_xpath('//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')

        r4   )ru   r6   )r!   r-   r4   r9   r:   rw   s         r   	get_xpathzItemLoader.get_xpathh  sE    * '&u3333t~f?z???b?B???r%   c                                                         t          |          }t           fd|D                       S )Nc              3   b   K   | ])} j         j        |fi                                 V  *d S r'   )r   r-   getall)rH   r-   r:   r!   s     r   rJ   z.ItemLoader._get_xpathvalues.<locals>.<genexpr>  sG      UUU*t}*577B77>>@@UUUUUUr%   rs   r	   r   )r!   xpathsr:   s   ` `r   ru   zItemLoader._get_xpathvalues  sI    ##%%%V$$UUUUUfUUUUUUr%   c                X    |                      |          } | j        ||g|R d|i| dS )az  
        Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.

        See :meth:`get_css` for ``kwargs``.

        :param css: the CSS selector to extract data from
        :type css: str

        Examples::

            # HTML snippet: <p class="product-name">Color TV</p>
            loader.add_css('name', 'p.product-name')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_css('price', 'p#price', re='the price is (.*)')
        r4   N)_get_cssvaluesr=   r!   r"   r2   r4   r9   r:   rw   s          r   add_csszItemLoader.add_css  sF    $ $$S))z6DJDDD2DDDDDDr%   c                X    |                      |          } | j        ||g|R d|i| dS )z^
        Similar to :meth:`add_css` but replaces collected data instead of adding it.
        r4   N)r   r@   r   s          r   replace_csszItemLoader.replace_css  sG     $$S)):vH
HHHrHRHHHHHr%   c                R    |                      |          } | j        |g|R d|i|S )a  
        Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.

        :param css: the CSS selector to extract data from
        :type css: str

        :param re: a regular expression to use for extracting data from the
            selected CSS region
        :type re: str or typing.Pattern

        Examples::

            # HTML snippet: <p class="product-name">Color TV</p>
            loader.get_css('p.product-name')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
        r4   )r   r6   )r!   r2   r4   r9   r:   rw   s         r   get_csszItemLoader.get_css  s>    ( $$S))t~f?z???b?B???r%   c                                                         t          |          }t           fd|D                       S )Nc              3   p   K   | ]0}j                             |                                          V  1d S r'   )r   r2   r   )rH   r2   r!   s     r   rJ   z,ItemLoader._get_cssvalues.<locals>.<genexpr>  s?      GG3t}((--4466GGGGGGr%   r   )r!   cssss   ` r   r   zItemLoader._get_cssvalues  sE    ##%%%4  GGGG$GGGGGGr%   c                X    |                      |          } | j        ||g|R d|i| dS )ac  
        Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.

        See :meth:`get_jmes` for ``kwargs``.

        :param jmes: the JMESPath selector to extract data from
        :type jmes: str

        Examples::

            # HTML snippet: {"name": "Color TV"}
            loader.add_jmes('name')
            # HTML snippet: {"price": the price is $1200"}
            loader.add_jmes('price', TakeFirst(), re='the price is (.*)')
        r4   N)_get_jmesvaluesr=   r!   r"   jmesr4   r9   r:   rw   s          r   add_jmeszItemLoader.add_jmes  sF    $ %%d++z6DJDDD2DDDDDDr%   c                X    |                      |          } | j        ||g|R d|i| dS )z_
        Similar to :meth:`add_jmes` but replaces collected data instead of adding it.
        r4   N)r   r@   r   s          r   replace_jmeszItemLoader.replace_jmes  sG     %%d++:vH
HHHrHRHHHHHr%   c                R    |                      |          } | j        |g|R d|i|S )a  
        Similar to :meth:`ItemLoader.get_value` but receives a JMESPath selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.

        :param jmes: the JMESPath selector to extract data from
        :type jmes: str

        :param re: a regular expression to use for extracting data from the
            selected JMESPath
        :type re: str or typing.Pattern

        Examples::

            # HTML snippet: {"name": "Color TV"}
            loader.get_jmes('name')
            # HTML snippet: {"price": the price is $1200"}
            loader.get_jmes('price', TakeFirst(), re='the price is (.*)')
        r4   )r   r6   )r!   r   r4   r9   r:   rw   s         r   get_jmeszItemLoader.get_jmes  s>    ( %%d++t~f?z???b?B???r%   c                                                         t          |          }t           j        d          st	          d          t           fd|D                       S )Njmespathz6Please install parsel >= 1.8.1 to get jmespath supportc              3   p   K   | ]0}j                             |                                          V  1d S r'   )r   r   r   )rH   r   r!   s     r   rJ   z-ItemLoader._get_jmesvalues.<locals>.<genexpr>  s?      OOt}--d33::<<OOOOOOr%   )rs   r	   hasattrr   r   r   )r!   jmesss   ` r   r   zItemLoader._get_jmesvalues  sp    ##%%%E""t}j11 	 H   OOOOOOOOOOr%   )NNNr'   )(rQ   
__module__r   __doc__dictr   r   re   rj   r$   propertyr   r   r0   r3   r=   r@   r7   r?   r6   r[   rY   r`   rg   r]   rd   rB   rs   rx   rz   r|   ru   r   r   r   r   r   r   r   r   r,   r%   r   r   r      s       D DL &hjj'xzz; ; ; ; & & X& $ $ X$     <@ / / / / /> @D 3 3 3 3 3E E E+ + + 04 $ $ $ $ $L     0 0 0$ $ $$ $ $, , , ,  &   <@ E E E E E, @D I I I I I 04 @ @ @ @ @0V V V
 8< E E E E E* <@ I I I I I ,0 @ @ @ @ @.H H H
 :> E E E E E* >B I I I I I .2 @ @ @ @ @.P P P P Pr%   r   N)r   
contextlibr   itemadapterr   parsel.utilsr   r   itemloaders.commonr   itemloaders.processorsr   itemloaders.utilsr	   r   r   r,   r%   r   <module>r      s    
        # # # # # # / / / / / / / / 2 2 2 2 2 2 + + + + + + ) ) ) ) ) )  \P \P \P \P \P \P \P \P \P \Pr%   