
    3 d5              	          U d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ded<    e            Z d Z!	 	 d'dedeee
e"e#f                           de$de#fdZ%d Z&ded<    e            Z'ddddedeee
e"e#f                           de$de"fdZ( G d d          Z)dede#de#ddfdZ*dede"fd Z+dedee#         fd!Z,dd"d#e-d$ee         defd%Z.d& Z/dS )(zY
This module provides some useful functions for working with
scrapy.http.Request objects
    N)DictIterableListOptionalTupleUnion)
urlunparse)WeakKeyDictionary)basic_auth_header)canonicalize_url)RequestSpider)ScrapyDeprecationWarning)urlparse_cached)load_object)to_bytes
to_unicodezOWeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], str]]_deprecated_fingerprint_cachec              #   p   K   | D ]0}||j         v r%|V  |j                             |          D ]}|V  1d S N)headersgetlist)r   requestheadervalues       4lib/python3.11/site-packages/scrapy/utils/request.py_serialize_headersr      sb        W_$ 	LLL 0088  	     Fr   include_headerskeep_fragmentsreturnc                    |s|rd}nd}t          j        |t          d           d}|r&t          d t	          |          D                       }t
                              | i           }||f}||vrt          j                    }|	                    t          | j                             |	                    t          t          | j        |                               |	                    | j        pd           |r(t          ||           D ]}|	                    |           |                                ||<   ||         S )	a/  
    Return the request fingerprint as an hexadecimal string.

    The request fingerprint is a hash that uniquely identifies the resource the
    request points to. For example, take the following two urls:

    http://www.example.com/query?id=111&cat=222
    http://www.example.com/query?cat=222&id=111

    Even though those are two different URLs both point to the same resource
    and are equivalent (i.e. they should return the same response).

    Another example are cookies used to store session ids. Suppose the
    following page is only accessible to authenticated users:

    http://www.example.com/members/offers.html

    Lots of sites use a cookie to store the session id, which adds a random
    component to the HTTP Request and thus should be ignored when calculating
    the fingerprint.

    For this reason, request headers are ignored by default when calculating
    the fingerprint. If you want to include specific headers use the
    include_headers argument, which is a list of Request headers to include.

    Also, servers usually ignore fragments in urls when handling requests,
    so they are also ignored by default when calculating the fingerprint.
    If you want to include them, set the keep_fragments argument to True
    (for instance when handling requests with a headless browser).
    a  Call to deprecated function scrapy.utils.request.request_fingerprint().

If you are using this function in a Scrapy component because you need a non-default fingerprinting algorithm, and you are OK with that non-default fingerprinting algorithm being used by all Scrapy components and not just the one calling this function, use crawler.request_fingerprinter.fingerprint() instead in your Scrapy component (you can get the crawler object from the 'from_crawler' class method), and use the 'REQUEST_FINGERPRINTER_CLASS' setting to configure your non-default fingerprinting algorithm.

Otherwise, consider using the scrapy.utils.request.fingerprint() function instead.

If you switch to 'fingerprint()', or assign the 'REQUEST_FINGERPRINTER_CLASS' setting a class that uses 'fingerprint()', the generated fingerprints will not only be bytes instead of a string, but they will also be different from those generated by 'request_fingerprint()'. Before you switch, make sure that you understand the consequences of this (e.g. cache invalidation) and are OK with them; otherwise, consider implementing your own function which returns the same fingerprints as the deprecated 'request_fingerprint()' function.ax  Call to deprecated function scrapy.utils.request.request_fingerprint().

If you are using this function in a Scrapy component, and you are OK with users of your component changing the fingerprinting algorithm through settings, use crawler.request_fingerprinter.fingerprint() instead in your Scrapy component (you can get the crawler object from the 'from_crawler' class method).

Otherwise, consider using the scrapy.utils.request.fingerprint() function instead.

Either way, the resulting fingerprints will be returned as bytes, not as a string, and they will also be different from those generated by 'request_fingerprint()'. Before you switch, make sure that you understand the consequences of this (e.g. cache invalidation) and are OK with them; otherwise, consider implementing your own function which returns the same fingerprints as the deprecated 'request_fingerprint()' function.   category
stacklevelNc              3   X   K   | ]%}t          |                                          V  &d S r   r   lower.0hs     r   	<genexpr>z&request_fingerprint.<locals>.<genexpr>{   C       *
 *
$%HQWWYY*
 *
 *
 *
 *
 *
r   r    r   )warningswarnr   tuplesortedr   
setdefaulthashlibsha1updater   methodr   urlbodyr   	hexdigest)	r   r   r    messageprocessed_include_headerscache	cache_keyfpparts	            r   request_fingerprintrB   "   sy   F  2
. 2
O 	:O 	, M'$<KKKK=A 
$) *
 *
)/)@)@*
 *
 *
 %
 %
! *44WbAAE*N;I 
*\^^
		(7>**+++
		%gk.QQQRR	
 	
 	
 			',%#&&&$ 	 *+DgNN    		$<<>>ir   c                      t          j                    5  t          j        d           t                              t          | i |          cd d d            S # 1 swxY w Y   d S )Nignore)r0   catch_warningssimplefilterbytesfromhexrB   )argskwargss     r   _request_fingerprint_as_bytesrK      s    		 	"	" C Ch'''}}0$A&AABBC C C C C C C C C C C C C C C C C Cs   8AA AzQWeakKeyDictionary[Request, Dict[Tuple[Optional[Tuple[bytes, ...]], bool], bytes]]_fingerprint_cache)r   r    c                p   d}|r&t          d t          |          D                       }t                              | i           }||f}||vri }|rG|D ]D}|| j        v r9d | j                            |          D             ||                                <   Et          | j                  t          | j
        |          | j        pd                                |d}t          j        |d          }	t          j        |	                                                                          ||<   ||         S )	a  
    Return the request fingerprint.

    The request fingerprint is a hash that uniquely identifies the resource the
    request points to. For example, take the following two urls:

    http://www.example.com/query?id=111&cat=222
    http://www.example.com/query?cat=222&id=111

    Even though those are two different URLs both point to the same resource
    and are equivalent (i.e. they should return the same response).

    Another example are cookies used to store session ids. Suppose the
    following page is only accessible to authenticated users:

    http://www.example.com/members/offers.html

    Lots of sites use a cookie to store the session id, which adds a random
    component to the HTTP Request and thus should be ignored when calculating
    the fingerprint.

    For this reason, request headers are ignored by default when calculating
    the fingerprint. If you want to include specific headers use the
    include_headers argument, which is a list of Request headers to include.

    Also, servers usually ignore fragments in urls when handling requests,
    so they are also ignored by default when calculating the fingerprint.
    If you want to include them, set the keep_fragments argument to True
    (for instance when handling requests with a headless browser).
    Nc              3   X   K   | ]%}t          |                                          V  &d S r   r(   r*   s     r   r-   zfingerprint.<locals>.<genexpr>   r.   r   c                 6    g | ]}|                                 S  )hex)r+   header_values     r   
<listcomp>zfingerprint.<locals>.<listcomp>   s4     - - -( %((**- - -r   r/   r   )r8   r9   r:   r   T)	sort_keys)r2   r3   rL   r4   r   r   rQ   r   r8   r   r9   r:   jsondumpsr5   r6   encodedigest)
r   r   r    r=   r>   r?   r   r   fingerprint_datafingerprint_jsons
             r   fingerprintr[      sw   H >B 
$) *
 *
)/)@)@*
 *
 *
 %
 %
! ))'266E*N;I L )+$ 	3  W_, - -,3O,C,CF,K,K- - -GFJJLL)
 !00#GKOOO\(S--//	
 
  :&6$GGG"<(8(?(?(A(ABBIIKKir   c                   <    e Zd ZdZed             ZddZdefdZdS )RequestFingerprintera  Default fingerprinter.

    It takes into account a canonical version
    (:func:`w3lib.url.canonicalize_url`) of :attr:`request.url
    <scrapy.http.Request.url>` and the values of :attr:`request.method
    <scrapy.http.Request.method>` and :attr:`request.body
    <scrapy.http.Request.body>`. It then generates an `SHA1
    <https://en.wikipedia.org/wiki/SHA-1>`_ hash.

    .. seealso:: :setting:`REQUEST_FINGERPRINTER_IMPLEMENTATION`.
    c                      | |          S r   rP   )clscrawlers     r   from_crawlerz!RequestFingerprinter.from_crawler   s    s7||r   Nc                     |r|j                             d          }nd}|dk    r,d}t          j        |t          d           t
          | _        d S |dk    rt          | _        d S t          d|d          )	N$REQUEST_FINGERPRINTER_IMPLEMENTATIONz2.6a  '2.6' is a deprecated value for the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting.

It is also the default value. In other words, it is normal to get this warning if you have not defined a value for the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting. This is so for backward compatibility reasons, but it will change in a future version of Scrapy.

See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.r#   r$   z2.7zHGot an invalid value on setting 'REQUEST_FINGERPRINTER_IMPLEMENTATION': z0. Valid values are '2.6' (deprecated) and '2.7'.)	settingsgetr0   r1   r   rK   _fingerprintr[   
ValueError)selfr`   implementationr<   s       r   __init__zRequestFingerprinter.__init__   s     	#$-116 NN #NU" 	A  M',DQRSSSS =Du$ 	 +D!    r   r   c                 ,    |                      |          S r   )rf   )rh   r   s     r   r[   z RequestFingerprinter.fingerprint  s      )))r   r   )	__name__
__module____qualname____doc__classmethodra   rj   r   r[   rP   r   r   r]   r]      si        
 
   [       D*7 * * * * * *r   r]   usernamepasswordc                 6    t          ||          | j        d<   dS )zAuthenticate the given request (in place) using the HTTP basic access
    authentication mechanism (RFC 2617) and the given username and password
    AuthorizationN)r   r   )r   rq   rr   s      r   request_authenticateru     s      (98'L'LGOO$$$r   c                 l   t          |           }t          dd|j        pd|j        |j        df          }t          | j                  dz   t          |          z   dz   }|dt          |j        pd          z   dz   z  }| j        r|| j        	                                dz   z  }|dz  }|| j
        z  }|S )zReturn the raw HTTP representation (as bytes) of the given request.
    This is provided only for reference since it's not the actual stream of
    bytes that will be send when performing the request (that's controlled
    by Twisted).
     /    s    HTTP/1.1
s   Host: r   s   
)r   r	   pathparamsqueryr   r8   hostnamer   	to_stringr:   )r   parsedrz   ss       r   request_httpreprr     s     W%%Fr2v{1c6=&,PRSTTD  4'(4..8;KKAXfo455	5	??A 3	W_&&((722LAAHr   c                 `    | j                             d          }||S t          |d          S )z0Return Referer HTTP header suitable for logging.RefererNreplace)errors)r   re   r   )r   referrers     r   referer_strr   ,  s7    ""9--H hy1111r   )spiderdr   c                T   d| v rt          | d                   nt          fd|                                 D             }|                     d          r|rt	          || d                   |d<   |                     d          r|rt	          || d                   |d<    di |S )zCreate a :class:`~scrapy.Request` object from a dict.

    If a spider is given, it will try to resolve the callbacks looking at the
    spider for methods with the same name.
    _classc                 .    i | ]\  }}|j         v ||S rP   )
attributes)r+   keyr   request_clss      r   
<dictcomp>z%request_from_dict.<locals>.<dictcomp>;  s,    VVVZS%{?U8UVc5VVVr   callbackerrbackrP   )r   r   itemsre   _get_method)r   r   rJ   r   s      @r   request_from_dictr   4  s     /7!mH+ak***KVVVV17799VVVFuuZ @V @(:??zuuY >F >')==y;     r   c                     t          |          }	 t          | |          S # t          $ r t          d|d|            w xY w)z%Helper function for request_from_dictzMethod z not found in: )strgetattrAttributeErrorrg   )objnames     r   r   r   C  sa    t99DAsD!!! A A A?4??#??@@@As	   !  A)NF)0ro   r5   rU   r0   typingr   r   r   r   r   r   urllib.parser	   weakrefr
   
w3lib.httpr   	w3lib.urlr   scrapyr   r   scrapy.exceptionsr   scrapy.utils.httpobjr   scrapy.utils.miscr   scrapy.utils.pythonr   r   __annotations__r   r   rG   r   boolrB   rK   rL   r[   r]   ru   r   r   dictr   r   rP   r   r   <module>r      s:    
    ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # # # # # # % % % % % % ( ( ( ( ( ( & & & & & & " " " " " " " " 6 6 6 6 6 6 0 0 0 0 0 0 ) ) ) ) ) ) 4 4 4 4 4 4 4 4p p p p 1 1 3 3    >B i iihuUCZ'89:i i 		i i i iXC C C h g g g&&((  >B 	> > >> huUCZ'89:> 	>
 > > > >B4* 4* 4* 4* 4* 4* 4* 4*nMMM M 
	M M M Mg %    "2 2Xc] 2 2 2 2 >B ! ! ! !(6*: !g ! ! ! !A A A A Ar   