
    3 dE6              
          d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ dZdZdZdZdZdZdZdZdZdZ G d d          Z G d de          Z  G d de          Z! G d de          Z" G d de          Z# G d  d!e          Z$ G d" d#e          Z% G d$ d%e          Z& G d& d'e          Z' G d( d)e!          Z(d* e e!e"e#e$e%e&e'e(f	D             Z)e!e)d+<   d0d-Z* G d. d/          Z+dS )1z`
RefererMiddleware: populates Request referer field, based on the Response which
originated it.
    N)Tuple)urlparse)safe_url_string)signals)NotConfigured)RequestResponse)load_object)
to_unicode	strip_url)aboutblobdata
filesystemzno-referrerzno-referrer-when-downgradezsame-originoriginzstrict-originzorigin-when-cross-originzstrict-origin-when-cross-originz
unsafe-urlzscrapy-defaultc                   d    e Zd ZU eZeedf         ed<   eed<   d Zd Z	d Z
ddZd	 Zd
 Zd ZdS )ReferrerPolicy.NOREFERRER_SCHEMESnamec                     t                      N)NotImplementedErrorselfresponse_urlrequest_urls      @lib/python3.11/site-packages/scrapy/spidermiddlewares/referer.pyreferrerzReferrerPolicy.referrer)   s    !###    c                 f    t          |          j        | j        vr|                     |          S d S r   )r   schemer   r   r   urls     r   stripped_referrerz ReferrerPolicy.stripped_referrer,   s7    C==t'>> 	'>>#&&&	' 	'r    c                 f    t          |          j        | j        vr|                     |          S d S r   )r   r"   r   r   r#   s     r   origin_referrerzReferrerPolicy.origin_referrer0   s7    C==t'>> 	$;;s###	$ 	$r    Fc                 2    |sdS t          |ddd|          S )a  
        https://www.w3.org/TR/referrer-policy/#strip-url

        If url is null, return no referrer.
        If url's scheme is a local scheme, then return no referrer.
        Set url's username to the empty string.
        Set url's password to null.
        Set url's fragment to null.
        If the origin-only flag is true, then:
            Set url's path to null.
            Set url's query to null.
        Return url.
        NT)strip_credentialsstrip_fragmentstrip_default_portorigin_onlyr   )r   r$   r,   s      r   r   zReferrerPolicy.strip_url4   s7      	4"##
 
 
 	
r    c                 0    |                      |d          S )zLReturn serialized origin (scheme, host, path) for a request or response URL.T)r,   r   r#   s     r   r   zReferrerPolicy.originL   s    ~~ct~444r    c                 `    t          |          }|j        dv rdS |                     |          S )N)r   F)r   r"   tls_protected)r   r$   
parsed_urls      r   potentially_trustworthyz&ReferrerPolicy.potentially_trustworthyP   s6    c]]
	) 	5!!#&&&r    c                 .    t          |          j        dv S )N)httpsftps)r   r"   r#   s     r   r/   zReferrerPolicy.tls_protectedW   s    }}#'888r    NF)__name__
__module____qualname__LOCAL_SCHEMESr   r   str__annotations__r   r%   r'   r   r   r1   r/    r    r   r   r   $   s         *7c3h777
III$ $ $' ' '$ $ $
 
 
 
05 5 5' ' '9 9 9 9 9r    r   c                   (    e Zd ZU dZeZeed<   d ZdS )NoReferrerPolicya(  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer

    The simplest policy is "no-referrer", which specifies that no referrer information
    is to be sent along with requests made from a particular request client to any origin.
    The header will be omitted entirely.
    r   c                     d S r   r<   r   s      r   r   zNoReferrerPolicy.referrerf   s    tr    N)	r6   r7   r8   __doc__POLICY_NO_REFERRERr   r:   r;   r   r<   r    r   r>   r>   [   sA           #D#"""    r    r>   c                   (    e Zd ZU dZeZeed<   d ZdS )NoReferrerWhenDowngradePolicyaW  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer-when-downgrade

    The "no-referrer-when-downgrade" policy sends a full URL along with requests
    from a TLS-protected environment settings object to a potentially trustworthy URL,
    and requests from clients which are not TLS-protected to any origin.

    Requests from TLS-protected clients to non-potentially trustworthy URLs,
    on the other hand, will contain no referrer information.
    A Referer HTTP header will not be sent.

    This is a user agent's default behavior, if no policy is otherwise specified.
    r   c                     |                      |          r|                      |          r|                     |          S d S r   )r/   r%   r   s      r   r   z&NoReferrerWhenDowngradePolicy.referrer{   sK    !!,// 	843E3Ek3R3R 	8)),777	8 	8r    N)	r6   r7   r8   r@   !POLICY_NO_REFERRER_WHEN_DOWNGRADEr   r:   r;   r   r<   r    r   rC   rC   j   sA           2D#1118 8 8 8 8r    rC   c                   (    e Zd ZU dZeZeed<   d ZdS )SameOriginPolicya  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-same-origin

    The "same-origin" policy specifies that a full URL, stripped for use as a referrer,
    is sent as referrer information when making same-origin requests from a particular request client.

    Cross-origin requests, on the other hand, will contain no referrer information.
    A Referer HTTP header will not be sent.
    r   c                     |                      |          |                      |          k    r|                     |          S d S r   r   r%   r   s      r   r   zSameOriginPolicy.referrer   sE    ;;|$$K(@(@@ 	8)),777	8 	8r    N)	r6   r7   r8   r@   POLICY_SAME_ORIGINr   r:   r;   r   r<   r    r   rG   rG      sA           #D#"""8 8 8 8 8r    rG   c                   (    e Zd ZU dZeZeed<   d ZdS )OriginPolicya>  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-origin

    The "origin" policy specifies that only the ASCII serialization
    of the origin of the request client is sent as referrer information
    when making both same-origin requests and cross-origin requests
    from a particular request client.
    r   c                 ,    |                      |          S r   )r'   r   s      r   r   zOriginPolicy.referrer   s    ##L111r    N)	r6   r7   r8   r@   POLICY_ORIGINr   r:   r;   r   r<   r    r   rL   rL      sA           D#2 2 2 2 2r    rL   c                   (    e Zd ZU dZeZeed<   d ZdS )StrictOriginPolicya0  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin

    The "strict-origin" policy sends the ASCII serialization
    of the origin of the request client when making requests:
    - from a TLS-protected environment settings object to a potentially trustworthy URL, and
    - from non-TLS-protected environment settings objects to any origin.

    Requests from TLS-protected request clients to non- potentially trustworthy URLs,
    on the other hand, will contain no referrer information.
    A Referer HTTP header will not be sent.
    r   c                     |                      |          r|                     |          s|                      |          s|                     |          S d S r   )r/   r1   r'   r   s      r   r   zStrictOriginPolicy.referrer   sf    |,,	6,,[99	6 %%l33	6
 ''555	6 	6r    N)	r6   r7   r8   r@   POLICY_STRICT_ORIGINr   r:   r;   r   r<   r    r   rP   rP      sA           %D#$$$6 6 6 6 6r    rP   c                   (    e Zd ZU dZeZeed<   d ZdS )OriginWhenCrossOriginPolicya  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-origin-when-cross-origin

    The "origin-when-cross-origin" policy specifies that a full URL,
    stripped for use as a referrer, is sent as referrer information
    when making same-origin requests from a particular request client,
    and only the ASCII serialization of the origin of the request client
    is sent as referrer information when making cross-origin requests
    from a particular request client.
    r   c                     |                      |          }||                      |          k    r|                     |          S |S r   rI   r   r   r   r   s       r   r   z$OriginWhenCrossOriginPolicy.referrer   sE    \**T[[--- 	8)),777r    N)	r6   r7   r8   r@   POLICY_ORIGIN_WHEN_CROSS_ORIGINr   r:   r;   r   r<   r    r   rT   rT      sA         	 	 0D#///    r    rT   c                   (    e Zd ZU dZeZeed<   d ZdS )!StrictOriginWhenCrossOriginPolicya  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin-when-cross-origin

    The "strict-origin-when-cross-origin" policy specifies that a full URL,
    stripped for use as a referrer, is sent as referrer information
    when making same-origin requests from a particular request client,
    and only the ASCII serialization of the origin of the request client
    when making cross-origin requests:

    - from a TLS-protected environment settings object to a potentially trustworthy URL, and
    - from non-TLS-protected environment settings objects to any origin.

    Requests from TLS-protected clients to non- potentially trustworthy URLs,
    on the other hand, will contain no referrer information.
    A Referer HTTP header will not be sent.
    r   c                 4   |                      |          }||                      |          k    r|                     |          S |                     |          r|                     |          s|                     |          s|                     |          S d S r   )r   r%   r/   r1   r'   rV   s       r   r   z*StrictOriginWhenCrossOriginPolicy.referrer   s    \**T[[--- 	8)),777|,,	6,,[99	6 %%l33	6
 ''555	6 	6r    N)	r6   r7   r8   r@   &POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGINr   r:   r;   r   r<   r    r   rY   rY      sA          " 7D#666	6 	6 	6 	6 	6r    rY   c                   (    e Zd ZU dZeZeed<   d ZdS )UnsafeUrlPolicya  
    https://www.w3.org/TR/referrer-policy/#referrer-policy-unsafe-url

    The "unsafe-url" policy specifies that a full URL, stripped for use as a referrer,
    is sent along with both cross-origin requests
    and same-origin requests made from a particular request client.

    Note: The policy's name doesn't lie; it is unsafe.
    This policy will leak origins and paths from TLS-protected resources
    to insecure origins.
    Carefully consider the impact of setting such a policy for potentially sensitive documents.
    r   c                 ,    |                      |          S r   )r%   r   s      r   r   zUnsafeUrlPolicy.referrer   s    %%l333r    N)	r6   r7   r8   r@   POLICY_UNSAFE_URLr   r:   r;   r   r<   r    r   r]   r]      sA           "D#!!!4 4 4 4 4r    r]   c                   F    e Zd ZU dZedz   Zeedf         ed<   e	Z
eed<   dS )DefaultReferrerPolicyz
    A variant of "no-referrer-when-downgrade",
    with the addition that "Referer" is not sent if the parent request was
    using ``file://`` or ``s3://`` scheme.
    )files3.r   r   N)r6   r7   r8   r@   r9   r   r   r:   r;   POLICY_SCRAPY_DEFAULTr   r<   r    r   ra   ra     sL           +8.*Hc3hHHH%D#%%%%%r    ra   c                     i | ]
}|j         |S r<   )r   ).0ps     r   
<dictcomp>rh     s,       	 FA  r     Fc                    	 t          |           S # t          $ re 	 t          |                                          cY S # t          $ r5 d| }|st          |          t          j        |t                     Y Y dS w xY ww xY w)z
    Expect a string for the path to the policy class,
    otherwise try to interpret the string as a standard value
    from https://www.w3.org/TR/referrer-policy/#referrer-policies
    zCould not load referrer policy N)	r
   
ValueError_policy_classeslowerKeyErrorRuntimeErrorwarningswarnRuntimeWarning)policywarning_onlymsgs      r   _load_policy_classrv   "  s    6""" 	 	 		"6<<>>2222 	 	 	>F>>C "3'''c>222ttt		s)    
B =B :A<7B ;A<<B c                   J    e Zd Zd	dZed             Zd Zd Zd Zd Z	d Z
dS )
RefererMiddlewareNc                 t    t           | _        |)t          |                    d                    | _        d S d S )NREFERRER_POLICY)ra   default_policyrv   get)r   settingss     r   __init__zRefererMiddleware.__init__7  sB    3 	V"4X\\BS5T5T"U"UD	V 	Vr    c                     |j                             d          st           | |j                   }|j                            |j        t          j                   |S )NREFERER_ENABLED)signal)r}   getboolr   r   connectrequest_scheduled)clscrawlermws      r   from_crawlerzRefererMiddleware.from_crawler<  s\    ''(9:: 	 S!"" 	 4W=VWWW	r    c                 n   |j                             d          }|St          |t                    r>|j                            d          }|"t          |                    d                    }||                                 S t          |d          }|r
 |            n|                                 S )a  
        Determine Referrer-Policy to use from a parent Response (or URL),
        and a Request to be sent.

        - if a valid policy is set in Request meta, it is used.
        - if the policy is set in meta but is wrong (e.g. a typo error),
          the policy from settings is used
        - if the policy is not set in Request meta,
          but there is a Referrer-policy header in the parent response,
          it is used if valid
        - otherwise, the policy from settings is used.
        referrer_policyNzReferrer-Policylatin1T)rt   )	metar|   
isinstancer	   headersr   decoder{   rv   )r   resp_or_urlrequestpolicy_namepolicy_headerr   s         r   rs   zRefererMiddleware.policyG  s     l&&'899 	M+x00 M + 3 7 78I J J  M",]-A-A(-K-K"L"LK 	)&&((( 4@@@6ssuuu!4!4!6!66r    c                 (      fd|pdD             S )Nc              3   D   K   | ]}                     |          V  d S r   _set_referer)rf   rresponser   s     r   	<genexpr>z:RefererMiddleware.process_spider_output.<locals>.<genexpr>a  s3      EE1!!!X..EEEEEEr    r<   r<   )r   r   resultspiders   ``  r   process_spider_outputz'RefererMiddleware.process_spider_output`  s%    EEEEE"EEEEr    c                V   K   |pd2 3 d {V }|                      ||          W V  !6 d S )Nr<   r   )r   r   r   r   r   s        r   process_spider_output_asyncz-RefererMiddleware.process_spider_output_asyncc  s]      | 	1 	1 	1 	1 	1 	1 	1!##Ax0000000 $||s   (c                     t          |t                    rQ|                     ||                              |j        |j                  }||j                            d|           |S )NReferer)r   r   rs   r   r$   r   
setdefault)r   r   r   r   s       r   r   zRefererMiddleware._set_refererg  s`    a!! 	:{{8Q//88quMMH :	$$Y999r    c                 \   |j                             dg           }|r|j                            d          }|pt          |          }|                     ||                              ||j                  }||k    r.||j                            d           d S ||j        d<   d S d S d S d S )Nredirect_urlsr   )r   r|   r   r   rs   r   r$   pop)r   r   r   redirected_urlsrequest_referrer
parent_urlpolicy_referrers          r   r   z#RefererMiddleware.request_scheduledn  s    !,**?B?? 	E&229== E --=>>
"&++j'"B"B"K"K# # #&66 E& E++I666665D	222#	E 	EE EE Er    r   )r6   r7   r8   r~   classmethodr   rs   r   r   r   r   r<   r    r   rx   rx   6  s        V V V V
   [7 7 72F F F1 1 1  E E E E Er    rx   r5   ),r@   rp   typingr   urllib.parser   	w3lib.urlr   scrapyr   scrapy.exceptionsr   scrapy.httpr   r	   scrapy.utils.miscr
   scrapy.utils.pythonr   scrapy.utils.urlr   r9   rA   rE   rJ   rN   rR   rW   r[   r_   rd   r   r>   rC   rG   rL   rP   rT   rY   r]   ra   rl   rv   rx   r<   r    r   <module>r      sH           ! ! ! ! ! ! % % % % % %       + + + + + + ) ) ) ) ) ) ) ) ) ) ) ) ) ) * * * * * * & & & & & & # $@ !" & "< )J &  ( 49 49 49 49 49 49 49 49n    ~   8 8 8 8 8N 8 8 8,8 8 8 8 8~ 8 8 8$2 2 2 2 2> 2 2 2 6 6 6 6 6 6 6 62    .   *6 6 6 6 6 6 6 6@4 4 4 4 4n 4 4 4(& & & & &9 & & &  	%#)
    4    (LE LE LE LE LE LE LE LE LE LEr    