
    3 d9                     ^   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
m
Z
 d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZmZ  ej         e!          Z" G d d          Z# G d d          Z$ G d d          Z% G d d          Z&d Z'd Z(dS )    N)	mktime_tzparsedate_tz)import_module)Path)time)WeakKeyDictionary)headers_dict_to_rawheaders_raw_to_dict)HeadersResponse)Request)responsetypes)Spider)urlparse_cached)	data_path)to_bytes
to_unicodec                   ,    e Zd Zd Zd Zd Zd Zd ZdS )DummyPolicyc                     |                     d          | _        d |                     d          D             | _        d S )NHTTPCACHE_IGNORE_SCHEMESc                 ,    g | ]}t          |          S  )int).0xs     ;lib/python3.11/site-packages/scrapy/extensions/httpcache.py
<listcomp>z(DummyPolicy.__init__.<locals>.<listcomp>   s+     "
 "
 "
CFF"
 "
 "
    HTTPCACHE_IGNORE_HTTP_CODES)getlistignore_schemesignore_http_codesselfsettingss     r   __init__zDummyPolicy.__init__   sO    &../IJJ"
 "
$,,-JKK"
 "
 "
r   c                 8    t          |          j        | j        vS N)r   schemer"   )r%   requests     r   should_cache_requestz DummyPolicy.should_cache_request   s    w''.d6IIIr   c                     |j         | j        vS r)   )statusr#   )r%   responser+   s      r   should_cache_responsez!DummyPolicy.should_cache_response!   s    d&<<<r   c                     dS NTr   )r%   cachedresponser+   s      r   is_cached_response_freshz$DummyPolicy.is_cached_response_fresh$       tr   c                     dS r2   r   )r%   r3   r/   r+   s       r   is_cached_response_validz$DummyPolicy.is_cached_response_valid'   r5   r   N)__name__
__module____qualname__r'   r,   r0   r4   r7   r   r   r   r   r      sb        
 
 
J J J= = =      r   r   c                   N    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd ZdS )RFC2616Policyi3c                     |                     d          | _        |                    d          | _        t	                      | _        d |                    d          D             | _        d S )NHTTPCACHE_ALWAYS_STOREr   c                 ,    g | ]}t          |          S r   )r   )r   ccs     r   r   z*RFC2616Policy.__init__.<locals>.<listcomp>3   s.     /
 /
 /
 RLL/
 /
 /
r   (HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS)getboolalways_storer!   r"   r   
_cc_parsedignore_response_cache_controlsr$   s     r   r'   zRFC2616Policy.__init__/   ss    $,,-EFF&../IJJ+--/
 /
&&'QRR/
 /
 /
+++r   c                     || j         vri|j                            dd          }t          |          }t	          |t
                    r | j        D ]}|                    |d            || j         |<   | j         |         S )Ns   Cache-Controlr   )rD   headersgetparse_cachecontrol
isinstancer   rE   pop)r%   rcchparsedkeys        r   _parse_cachecontrolz!RFC2616Policy._parse_cachecontrol8   s    DO# 	()-- 0#66C',,F!X&& *> * *CJJsD))))!'DOAq!!r   c                 v    t          |          j        | j        v rdS |                     |          }d|v rdS dS )NF   no-storeT)r   r*   r"   rP   )r%   r+   r@   s      r   r,   z"RFC2616Policy.should_cache_requestB   sL    7##*d.AA 	5%%g.." 	5tr   c                     |                      |          }d|v rdS |j        dk    rdS | j        rdS d|v s	d|j        v rdS |j        dv rdS |j        dv rd	|j        v pd
|j        v S dS )NrR   F0  T   max-age   Expiresi,  i-  i4  )      i     Last-Modified   ETag)rP   r.   rC   rG   )r%   r/   r+   r@   s       r   r0   z#RFC2616Policy.should_cache_responseL   s     %%h//" 	5?c! 	5 	4 	zX-== 	4?o- 	4?o- 	W#x'77V7hFV;VV ur   c                    |                      |          }|                      |          }d|v sd|v rdS t                      }|                     |||          }|                     |||          }|                     |          }|t          ||          }||k     rdS d|v rHd|vrD|d         }	|	dS 	 ||t          dt          |	                    z   k     rdS n# t          $ r Y nw xY w| 	                    ||           dS )Ns   no-cacheFTs	   max-stale   must-revalidater   )
rP   r   _compute_freshness_lifetime_compute_current_age_get_max_ageminmaxr   
ValueError_set_conditional_validators)
r%   r3   r+   r@   ccreqnowfreshnesslifetime
currentage	reqmaxagestaleages
             r   r4   z&RFC2616Policy.is_cached_response_freshh   sg   %%n55((11" 	u 4 	5ff <<GS
 
 ..~wLL
%%e,,	 	B #$5y A A)) 	45  	%7r%A 	 \*H t 1C3x==4I4I II  4     	((.AAAus   5$C 
C)(C)c                 d    |j         dk    r|                     |          }d|vrdS |j         dk    S )Ni  r]   TrT   )r.   rP   )r%   r3   r/   r+   r@   s        r   r7   z&RFC2616Policy.is_cached_response_valid   sG     ?c! 	)).99B!+ t #%%r   c                     d|j         v r|j         d         |j         d<   d|j         v r|j         d         |j         d<   d S d S )NrZ   s   If-Modified-Sincer[   s   If-None-Match)rG   )r%   r+   r3   s      r   rd   z)RFC2616Policy._set_conditional_validators   sd    ~55 	4B4J 5GO01 n,, 	P0>0Fw0OGO,---	P 	Pr   c                 z    	 t          dt          |d                             S # t          t          f$ r Y d S w xY w)Nr   rU   )rb   r   KeyErrorrc   )r%   r@   s     r   r`   zRFC2616Policy._get_max_age   sJ    	q#bn--...*% 	 	 	44	s   "% ::c                    |                      |          }|                     |          }||S t          |j                            d                    p|}d|j        v r1t          |j        d                   }|rt          d||z
            ndS t          |j                            d                    }|r||k    r||z
  dz  S |j        dv r| j        S dS )N   DaterV   r   rZ   
   rW   )rP   r`   rfc1123_to_epochrG   rH   rb   r.   MAXAGE)	r%   r/   r+   rf   r@   maxagedateexpireslastmodifieds	            r   r^   z)RFC2616Policy._compute_freshness_lifetime   s    %%h//""2&& 	M   0 4 4W = =>>E# )) 	<&x'7
'CDDG .5;3q'D.)))!; ((8(<(<=M(N(NOO 	.LD0 	.<'2-- ?o- 	; qr   c                     d}t          |j                            d                    p|}||k    r||z
  }d|j        v r<	 t          |j        d                   }t	          ||          }n# t
          $ r Y nw xY w|S )Nr   rp   s   Age)rr   rG   rH   r   rb   rc   )r%   r/   r+   rf   rh   ru   ages          r   r_   z"RFC2616Policy._compute_current_age   s     
   0 4 4W = =>>E#: 	$tJX%% 	(*6233 S11

    s   *A, ,
A98A9N)r8   r9   r:   rs   r'   rP   r,   r0   r4   r7   rd   r`   r^   r_   r   r   r   r<   r<   +   s        F
 
 
" " "    8' ' 'R	& 	& 	&P P P    >    r   r<   c                   8    e Zd Zd ZdefdZd Zd Zd Zd Z	dS )	DbmCacheStoragec                     t          |d         d          | _        |                    d          | _        t	          |d                   | _        d | _        d S )NHTTPCACHE_DIRT)	createdirHTTPCACHE_EXPIRATION_SECSHTTPCACHE_DBM_MODULE)r   cachedirgetintexpiration_secsr   dbmoduledbr$   s     r   r'   zDbmCacheStorage.__init__   sP    !(?";tLLL'/JKK%h/E&FGGr   spiderc                     t          | j        |j         d          }| j                            t          |          d          | _        t                              dd|id|i           |j	        j
        | _        d S )Nz.dbcz(Using DBM cache storage in %(cachepath)s	cachepathr   extra)r   r   namer   openstrr   loggerdebugcrawlerrequest_fingerprinter_fingerprinter)r%   r   dbpaths      r   open_spiderzDbmCacheStorage.open_spider   s    dm%8%8%899-$$S[[#666&!V$ 	 	
 	
 	
 %nBr   c                 8    | j                                          d S r)   )r   closer%   r   s     r   close_spiderzDbmCacheStorage.close_spider   s    r   c                     |                      ||          }|d S |d         }|d         }t          |d                   }|d         }t          j        |||          } |||||          }	|	S )Nurlr.   rG   bodyrG   r   r   r   rG   r.   r   )
_read_datar   r   	from_args)
r%   r   r+   datar   r.   rG   r   respclsr/   s
             r   retrieve_responsez!DbmCacheStorage.retrieve_response   s    vw// 	F5kh$y/**F|)'sNNN7sGFNNNr   c                 <   | j                             |                                          }|j        |j        t          |j                  |j        d}t          j	        |d          | j
        | d<   t          t                                | j
        | d<   d S )N)r.   r   rG   r      protocol_data_time)r   fingerprinthexr.   r   dictrG   r   pickledumpsr   r   r   )r%   r   r+   r/   rO   r   s         r   store_responsezDbmCacheStorage.store_response   s    !--g66::<<o<H,--M	
 
 "(dQ!?!?!?3!$TVV3r   c                 .   | j                             |                                          }| j        }| d}||vrd S ||         }d| j        cxk     r"t                      t          |          z
  k     rn nd S t          j        || d                   S )Nr   r   r   )	r   r   r   r   r   r   floatr   loads)r%   r   r+   rO   r   tkeytss          r   r   zDbmCacheStorage._read_data  s    !--g66::<<W}}}r> 	FXt# 	 	 	 	dffuRyy&8 	 	 	 	 	F|B#}}}-...r   N)
r8   r9   r:   r'   r   r   r   r   r   r   r   r   r   r{   r{      s          
C& 
C 
C 
C 
C  
 
 
	- 	- 	-/ / / / /r   r{   c                   j    e Zd Zd ZdefdZd ZdedefdZdedefdZ	dedede
fd	Zdedefd
ZdS )FilesystemCacheStoragec                     t          |d                   | _        |                    d          | _        |                    d          | _        | j        rt          j        nt          | _        d S )Nr}   r   HTTPCACHE_GZIP)	r   r   r   r   rB   use_gzipgzipr   _openr$   s     r   r'   zFilesystemCacheStorage.__init__  sY    !(?";<<'/JKK (()9::"&-9TYYT


r   r   c                 t    t                               dd| j        id|i           |j        j        | _        d S )Nz.Using filesystem cache storage in %(cachedir)sr   r   r   )r   r   r   r   r   r   r   s     r   r   z"FilesystemCacheStorage.open_spider   sG    <'V$ 	 	
 	
 	
 %nBr   c                     d S r)   r   r   s     r   r   z#FilesystemCacheStorage.close_spider)  s    r   r+   c                 R   |                      ||          }|dS t          |                     ||                    }|                     |dz  d          5 }|                                }ddd           n# 1 swxY w Y   |                     |dz  d          5 }|                                }ddd           n# 1 swxY w Y   |                    d          }|d         }	t          t          |                    }
t          j	        |
||          } |||
|	|          }|S )	z7Return response if present in cache, or None otherwise.Nresponse_bodyrbresponse_headersresponse_urlr.   r   r   )

_read_metar   _get_request_pathr   readrH   r   r
   r   r   )r%   r   r+   metadatarpathfr   
rawheadersr   r.   rG   r   r/   s                r   r   z(FilesystemCacheStorage.retrieve_response,  s   ??6733 	FT++FG<<==ZZ/66 	!6688D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	ZZ 22D99 	"QJ	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"ll>**(#-j99::)'sNNN7sGFNNNs$   A88A<?A<B==CCc                    t          |                     ||                    }|                                s|                    d           |j        |j        |j        |j        t                      d}|                     |dz  d          5 }|	                    t          t          |                               ddd           n# 1 swxY w Y   |                     |dz  d          5 }t          j        ||d	           ddd           n# 1 swxY w Y   |                     |d
z  d          5 }|	                    t          |j                             ddd           n# 1 swxY w Y   |                     |dz  d          5 }|	                    |j                   ddd           n# 1 swxY w Y   |                     |dz  d          5 }|	                    t          |j                             ddd           n# 1 swxY w Y   |                     |dz  d          5 }|	                    |j                   ddd           dS # 1 swxY w Y   dS )z&Store the given response in the cache.T)parents)r   methodr.   r   	timestampmetawbNpickled_metar   r   r   r   request_headersrequest_body)r   r   existsmkdirr   r   r.   r   r   writer   reprr   dumpr	   rG   r   )r%   r   r+   r/   r   r   r   s          r   r   z%FilesystemCacheStorage.store_response=  s   T++FG<<==||~~ 	&KKK%%%;no$L
 
 ZZ-- 	.GGHT(^^,,---	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	.ZZ.55 	1K!a0000	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1ZZ 22D99 	;QGG'(899:::	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;ZZ/66 	#!GGHM"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#ZZ 11488 	:AGG'88999	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	: 	:ZZ.55 	"GGGL!!!	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"sl   0CCC/DDD7(E++E/2E/F66F:=F:(HHH2II!Ireturnc           	          | j                             |                                          }t          t	          | j        |j        |dd         |                    S )Nr      )r   r   r   r   r   r   r   )r%   r   r+   rO   s       r   r   z(FilesystemCacheStorage._get_request_pathV  sL    !--g66::<<4v{C!HcBBCCCr   c                    t          |                     ||                    }|dz  }|                                sd S |                                j        }d| j        cxk     rt                      |z
  k     rn nd S |                     |d          5 }t          j	        |          cd d d            S # 1 swxY w Y   d S )Nr   r   r   )
r   r   r   statst_mtimer   r   r   r   load)r%   r   r+   r   metapathmtimer   s          r   r   z!FilesystemCacheStorage._read_metaZ  s   T++FG<<==>)   	F(t# 	 	 	 	dffun 	 	 	 	 	FZZ$'' 	"1;q>>	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"s   B55B9<B9N)r8   r9   r:   r'   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s        : : :C& C C C C       ""V "g " " " "2D D DS D D D D	" 	"' 	" 	" 	" 	" 	" 	"r   r   c                     i }|                      d          D ]J}|                                                    d          \  }}}|r|r|nd||                                <   K|S )a4  Parse Cache-Control header

    https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9

    >>> parse_cachecontrol(b'public, max-age=3600') == {b'public': None,
    ...                                                 b'max-age': b'3600'}
    True
    >>> parse_cachecontrol(b'') == {}
    True

       ,   =N)splitstrip	partitionlower)header
directives	directiverO   sepvals         r   rI   rI   f  sq     J\\$'' ; ;	!))33D99S# 	;-0&:ccdJsyy{{#r   c                     	 t          | d          } t          t          |                     S # t          $ r Y d S w xY w)Nascii)encoding)r   r   r   	Exception)date_strs    r   rr   rr   z  sQ    h999h//000   tts   ,/ 
==))r   loggingr   email.utilsr   r   	importlibr   pathlibr   r   weakrefr   
w3lib.httpr	   r
   scrapy.httpr   r   scrapy.http.requestr   scrapy.responsetypesr   scrapy.spidersr   scrapy.utils.httpobjr   scrapy.utils.projectr   scrapy.utils.pythonr   r   	getLoggerr8   r   r   r<   r{   r   rI   rr   r   r   r   <module>r      s      / / / / / / / / # # # # # #             % % % % % % ? ? ? ? ? ? ? ? ) ) ) ) ) ) ) ) ' ' ' ' ' ' . . . . . . ! ! ! ! ! ! 0 0 0 0 0 0 * * * * * * 4 4 4 4 4 4 4 4		8	$	$       (p p p p p p p pf8/ 8/ 8/ 8/ 8/ 8/ 8/ 8/vJ" J" J" J" J" J" J" J"Z  (    r   