
    3 d5                       d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 erddl4m5Z5 eee$ef         e#ef         Z6 ej7        e8          Z9 G d d          Z: G d d          Z;dS )zfThis module implements the Scraper component which parses responses and
extracts information from them    )annotationsN)deque)TYPE_CHECKINGAnyAsyncGeneratorAsyncIterableDeque	GeneratorIterableOptionalSetTupleUnion)is_item)DeferredinlineCallbacks)Failure)Spidersignals)SpiderMiddlewareManager)CloseSpiderDropItemIgnoreRequest)RequestResponse)aiter_errback
defer_faildefer_succeediter_errbackparallelparallel_async)failure_to_exc_infologformatter_adapter)load_object#warn_on_generator_with_return_value)iterate_spider_output)Crawlerc                  H    e Zd ZdZdZdddZddZddZddZddZ	ddZ
dS )Slotz%Scraper slot (one per running spider)i   @KL max_active_sizeintc                    || _         t                      | _        t                      | _        d| _        d| _        d | _        d S )Nr   )r+   r   queuesetactiveactive_sizeitemproc_sizeclosing)selfr+   s     3lib/python3.11/site-packages/scrapy/core/scraper.py__init__zSlot.__init__8   s;    .(-
$'EE !"#+/    resultUnion[Response, Failure]requestr   returnr   c                   t                      }| j                            |||f           t          |t                    r6| xj        t          t          |j                  | j	                  z  c_        n| xj        | j	        z  c_        |S N)
r   r.   append
isinstancer   r1   maxlenbodyMIN_RESPONSE_SIZE)r4   r8   r:   deferreds       r5   add_response_requestzSlot.add_response_request@   s     ::
67H5666fh'' 	7C$4$4d6L M MM 66r7   
QueueTuplec                z    | j                                         \  }}}| j                            |           |||fS r=   )r.   popleftr0   add)r4   responser:   rD   s       r5   next_response_request_deferredz#Slot.next_response_request_deferredK   s=    &*j&8&8&:&:#'8   (**r7   Nonec                    | j                             |           t          |t                    r7| xj        t          t          |j                  | j                  z  c_        d S | xj        | j        z  c_        d S r=   )	r0   remover?   r   r1   r@   rA   rB   rC   )r4   r8   r:   s      r5   finish_responsezSlot.finish_responseP   sz     	7###fh'' 	7C$4$4d6L M MM 66r7   boolc                     | j         p| j         S r=   )r.   r0   r4   s    r5   is_idlezSlot.is_idleY   s    J-$+..r7   c                "    | j         | j        k    S r=   )r1   r+   rR   s    r5   needs_backoutzSlot.needs_backout\   s    $"666r7   N)r*   )r+   r,   )r8   r9   r:   r   r;   r   )r;   rF   )r8   r9   r:   r   r;   rL   r;   rP   )__name__
__module____qualname____doc__rC   r6   rE   rK   rO   rS   rU    r7   r5   r)   r)   3   s        //0 0 0 0 0	 	 	 	+ + + +
7 7 7 7/ / / /7 7 7 7 7 7r7   r)   c                      e Zd Zd)dZed*d            Zd+d
Zd,dZd-dZd.dZ	d-dZ
d.dZd.dZd.dZd/dZd0dZd1d!Zd2d%Zd3d'Zd(S )4Scrapercrawlerr'   r;   rL   c                *   d | _         t          j        |          | _        t	          |j        d                   }|                    |          | _        |j                            d          | _        || _	        |j
        | _
        |j        | _        d S )NITEM_PROCESSORCONCURRENT_ITEMS)slotr   from_crawlerspidermwr$   settingsitemprocgetintconcurrent_itemsr^   r   logformatter)r4   r^   itemproc_clss      r5   r6   zScraper.__init__a   s    $(	/<WEE"7#34D#EFF$11':: ' 0 7 78J K K#0r7   spiderr   c              #     K   t          | j        j                            d                    | _        | j                            |          V  dS )z@Open the given spider for scraping and allocate resources for itSCRAPER_SLOT_MAX_ACTIVE_SIZEN)r)   r^   re   rg   rb   rf   open_spiderr4   rk   s     r5   rn   zScraper.open_spiderk   sM       .556TUUVV	m''///////r7   r   c                    | j         t          d          t                      | j         _        | j         j                            | j        j                   |                     |           | j         j        S )z6Close a spider being scraped and release its resourcesNScraper slot not assigned)rb   RuntimeErrorr   r3   addCallbackrf   close_spider_check_if_closingro   s     r5   rt   zScraper.close_spiderq   sf    9 	<:;;;$JJ		%%dm&@AAAv&&&y  r7   rP   c                    | j          S )z6Return True if there isn't any more spiders to process)rb   rR   s    r5   rS   zScraper.is_idlez   s    9}r7   c                    | j         J | j         j        r:| j                                         r#| j         j                            |           d S d S d S r=   )rb   r3   rS   callbackro   s     r5   ru   zScraper._check_if_closing~   se    y$$$9 	/!2!2!4!4 	/I&&v.....	/ 	/ 	/ 	/r7   r8   r9   r:   r   c                     j         t          d           j                                       } fd}|                    |           |                    fd                                           |S )Nrq   c                    j                                                                                                      | S r=   )rb   rO   ru   _scrape_next)_r:   r8   r4   rk   s    r5   finish_scrapingz/Scraper.enqueue_scrape.<locals>.finish_scraping   sH    I%%fg666""6***f%%%Hr7   c                b    t                               ddit          |           di          S )Nz"Scraper bug processing %(request)sr:   rk   exc_infoextra)loggererrorr"   )fr:   rk   s    r5   <lambda>z(Scraper.enqueue_scrape.<locals>.<lambda>   s8    fll4G$,Q//(	 #   r7   )rb   rr   rE   addBoth
addErrbackr{   )r4   r8   r:   rk   dfdr}   s   ````  r5   enqueue_scrapezScraper.enqueue_scrape   s     9 	<:;;;i,,VW==	 	 	 	 	 	 	 	 	O$$$    	
 	
 	
 	&!!!
r7   c                    | j         J | j         j        rU| j                                         \  }}}|                     |||                              |           | j         j        Sd S d S r=   )rb   r.   rK   _scrapechainDeferred)r4   rk   rJ   r:   rD   s        r5   r{   zScraper._scrape_next   s    y$$$io 	L*.)*R*R*T*T'HgxLL7F33AA(KKK io 	L 	L 	L 	L 	Lr7   c                $   t          |t          t          f          s"t          dt	          |           d|          |                     |||          }|                    | j        |||           |                    | j	        |||           |S )z_
        Handle the downloaded response or failure through the spider callback/errback
        z2Incorrect type: expected Response or Failure, got z: )
r?   r   r   	TypeErrortype_scrape2r   handle_spider_errorrs   handle_spider_outputr4   r8   r:   rk   r   s        r5   r   zScraper._scrape   s     &8W"566 	_T&\\__U[__   mmGV
 
 	t/&&III17FFKKK
r7   c                    t          |t                    r"| j                            | j        |||          S |                     |||          }|                    | j        |||          S )z]
        Handle the different cases of request's result been a Response or a Failure
        )r?   r   rd   scrape_responsecall_spiderr   _log_download_errorsr   s        r5   r   zScraper._scrape2   sp     fh'' 	=00 &'6   vw77~~d7&QQQr7   c                   t          |t                    rlt          |dd           ||_        |j        j        p|j        }t          ||           t          |          }|                    ||j        j	                   nE||_        t          ||j
                   t          |          }|                    |j
                   |                    t                    S )Nr:   )rx   callbackKeywords)r?   r   getattrr:   rx   _parser%   r   addCallbacks	cb_kwargserrbackr   r   rs   r&   )r4   r8   r:   rk   rx   r   s         r5   r   zScraper.call_spider   s     fh'' 	,vy$// )!(~.?&-H/AAA''C!FN4L      %FN/HHHV$$CNN7?+++4555r7   _failurer   rJ   r   c                   |j         }t          |t                    r7| j        j        J | j        j                            ||j        pd           d S | j                            ||||          }t          j
        t          |          t          |          d|id | j                            t          j        |||           | j        j                            d|j         j        j         |           d S )N	cancelledrk   r   )signalfailurerJ   rk   zspider_exceptions/)rk   )valuer?   r   r^   enginert   reasonri   spider_errorr   logr#   r"   r   send_catch_logstats	inc_value	__class__rW   )r4   r   r:   rJ   rk   exclogkwss          r5   r   zScraper.handle_spider_error   s    nc;'' 	<&222L,,VSZ5N;OOOF"//'8VTT
!&))(22V$	
 	
 	
 	

 	##'	 	$ 	
 	
 	
 	$$D!9!BDDV 	% 	
 	
 	
 	
 	
r7   Union[Iterable, AsyncIterable]c                ,   |st          d           S t          |t                    r7t          || j        |||          }t          || j        | j        |||          }n6t          || j        |||          }t          || j        | j        |||          }|S r=   )
r   r?   r   r   r   r!   rh   _process_spidermw_outputr   r    )r4   r8   r:   rJ   rk   itr   s          r5   r   zScraper.handle_spider_output   s      	' &&&fm,, 	0'8V B !%- CC 0'8V B %- C 
r7   outputr   Optional[Deferred]c                   | j         J t          |t                    r/| j        j        J | j        j                            |           nt          |          rO| j         xj        dz  c_        | j        	                    ||          }|
                    | j        |||           |S |n6t          |          j        }t                              d||dd|i           dS )ziProcess each Request/Item (given in the output parameter) returned
        from the given spider
        N)r:      zJSpider must return request, item, or None, got %(typename)r in %(request)s)r:   typenamerk   )r   )rb   r?   r   r^   r   crawlr   r2   rf   process_itemr   _itemproc_finishedr   rW   r   r   )r4   r   r:   rJ   rk   r   r   s          r5   r   z Scraper._process_spidermw_output  s     y$$$fg&& 	<&222L%%f%5555V__ 	I##q(##-,,VV<<CKK/6JJJJ 	F||,HLL\#::(    
 tr7   spider_failuredownload_failureUnion[Failure, None]c                   |                     t                    s|j        rK| j                            |||          }t          j        t          |          d|it          |          d nS|	                                }|r=| j                            ||||          }t          j        t          |          dd|ii ||ur|S dS )aM  Log and silence errors that come from the engine (typically download
        errors that got propagated thru here).

        spider_failure: the value passed into the errback of self.call_spider()
        download_failure: the value passed into _scrape2() from
        ExecutionEngine._handle_downloader_output() as "result"
        rk   r   r   r   N)
checkr   framesri   download_errorr   r   r#   r"   getErrorMessage)r4   r   r   r:   rk   r   errmsgs          r5   r   zScraper._log_download_errors*  s     %%m44 	& *99$gv  
)&11#V,01ABB     *99;; !.==('66 F J-f55'0  
 !11 	"!!tr7   itemc                .   | j         J | j         xj        dz  c_        t          |t                    r|j        }t          |t
                    rm| j                            ||||          }| t          j	        t          |          dd|ii | j                            t          j        ||||j                  S | j                            ||||          }t          j	        t          |          d|it          |          d | j                            t          j        ||||          S | j                            |||          }| t          j	        t          |          dd|ii | j                            t          j        |||          S )	zEItemProcessor finished for the given ``item`` and returned ``output``Nr   r   rk   )r   r   rJ   rk   	exceptionr   )r   r   rJ   rk   r   )r   r   rJ   rk   )rb   r2   r?   r   r   r   ri   droppedr   r   r#   r   send_catch_log_deferreditem_dropped
item_errorr"   scrapeditem_scraped)r4   r   r   rJ   rk   exr   s          r5   r   zScraper._itemproc_finishedQ  s    y$$$	1$fg&& 	B"h'' 
*224XvNN XJ 4V < <WXvDVWWW|;;"/%!$l <    &11$HfMMFJ%f--(,V44   
 <77)! 8    "**68VDD 	PJ,V44OXv<NOOO|33'fxPV 4 
 
 	
r7   N)r^   r'   r;   rL   )rk   r   )rk   r   r;   r   rV   )rk   r   r;   rL   )r8   r9   r:   r   rk   r   r;   r   )
r   r   r:   r   rJ   r   rk   r   r;   rL   )
r8   r   r:   r   rJ   r   rk   r   r;   r   )
r   r   r:   r   rJ   r   rk   r   r;   r   )
r   r   r   r   r:   r   rk   r   r;   r   )
r   r   r   r   rJ   r   rk   r   r;   rL   )rW   rX   rY   r6   r   rn   rt   rS   ru   r   r{   r   r   r   r   r   r   r   r   r[   r7   r5   r]   r]   `   sV       1 1 1 1 0 0 0 _0
! ! ! !   / / / /
   2L L L L   "R R R R6 6 6 6&
 
 
 
0" " " "H   4% % % %N%
 %
 %
 %
 %
 %
r7   r]   )<rZ   
__future__r   loggingcollectionsr   typingr   r   r   r   r	   r
   r   r   r   r   r   itemadapterr   twisted.internet.deferr   r   twisted.python.failurer   scrapyr   r   scrapy.core.spidermwr   scrapy.exceptionsr   r   r   scrapy.httpr   r   scrapy.utils.deferr   r   r   r   r    r!   scrapy.utils.logr"   r#   scrapy.utils.miscr$   r%   scrapy.utils.spiderr&   scrapy.crawlerr'   rF   	getLoggerrW   r   r)   r]   r[   r7   r5   <module>r      s  " " " " " " " "                                        < < < < < < < < * * * * * * " " " " " " " " 8 8 8 8 8 8 B B B B B B B B B B ) ) ) ) ) ) ) )                G F F F F F F F N N N N N N N N 5 5 5 5 5 5 '&&&&&& 57*+Wh>?
 
	8	$	$*7 *7 *7 *7 *7 *7 *7 *7ZV
 V
 V
 V
 V
 V
 V
 V
 V
 V
r7   