
    3 dF                     F   d Z ddlZddlZddlmZ ddlmZmZmZmZm	Z	m
Z
 ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)  ej*        e+          Z, G d d          Z- G d d          Z.dS )z
This is the Scrapy engine which controls the Scheduler, Downloader and Spider.

For more information see docs/topics/architecture.rst

    N)time)CallableIterableIteratorOptionalSetUnion)DeferredinlineCallbackssucceed)LoopingCallFailure)signals)Scraper)CloseSpiderDontCloseSpiderScrapyDeprecationWarning)RequestResponse)BaseSettings)Spider)failure_to_exc_infologformatter_adapter)create_instanceload_object)CallLaterOncec                   Z    e Zd ZdedededdfdZdeddfdZdeddfd	Z	de
fd
ZddZdS )Slotstart_requestsclose_if_idlenextcallreturnNc                     d | _         t                      | _        t          |          | _        || _        || _        || _        t          |j	                  | _
        d S N)closingset
inprogressiterr    r!   r"   	schedulerr   schedule	heartbeat)selfr    r!   r"   r*   s        2lib/python3.11/site-packages/scrapy/core/engine.py__init__zSlot.__init__   sR     ,0(+26~2F2F* "$X%677    requestc                 :    | j                             |           d S r%   )r(   addr-   r1   s     r.   add_requestzSlot.add_request-   s    G$$$$$r0   c                 b    | j                             |           |                                  d S r%   )r(   remove_maybe_fire_closingr4   s     r.   remove_requestzSlot.remove_request0   s0    w'''  """""r0   c                 ^    t                      | _        |                                  | j        S r%   )r
   r&   r8   r-   s    r.   closez
Slot.close4   s'    zz  """|r0   c                     | j         h| j        sc| j        r>| j                                         | j        j        r| j                                         | j                             d            d S d S d S r%   )r&   r(   r"   cancelr,   runningstopcallbackr;   s    r.   r8   zSlot._maybe_fire_closing9   s    < 	(DO 	(} *$$&&&>) *N'')))L!!$'''''	( 	( 	( 	(r0   r#   N)__name__
__module____qualname__r   boolr   r/   r   r5   r9   r
   r<   r8    r0   r.   r   r      s        8 8 8  	8 
8 8 8 8%7 %t % % % %#g #$ # # # #x    
( ( ( ( ( (r0   r   c            
           e Zd ZdeddfdZdedefdZede	fd            Z
de	fdZde	fd	Zd%d
Zd%dZd%dZdefdZdee	         fdZdeeeef         dedee	         fdZd&dee         defdZd&dedee         ddfdZdededdfdZd&dedee         de	fdZdeeef         dededee	ef         fdZdedee         de	fdZe	 d'dede defd            Z!d%dZ"d(ded e#de	fd!Z$e%de&fd"            Z'defd#Z(dededdfd$Z)dS ))ExecutionEnginespider_closed_callbackr#   Nc                 \   || _         |j        | _        |j        | _        |j        | _        d | _        d | _        d| _        d| _        |                     |j                  | _	        t          | j        d                   } ||          | _        t          |          | _        || _        d S )NF
DOWNLOADER)crawlersettingsr   logformatterslotspiderr?   paused_get_scheduler_classscheduler_clsr   
downloaderr   scraper_spider_closed_callback)r-   rM   rJ   downloader_clss       r.   r/   zExecutionEngine.__init__C   s    (#0$(	(,!66w7GHH$T]<%@AA(.11w'''=$$$r0   rN   c                     ddl m} t          |d                   }t          ||          st	          d|d          d          |S )Nr   )BaseScheduler	SCHEDULERzThe provided scheduler class (z2) does not fully implement the scheduler interface)scrapy.core.schedulerrZ   r   
issubclass	TypeError)r-   rN   rZ   rT   s       r.   rS   z$ExecutionEngine._get_scheduler_classR   sq    777777#H[$9::-77 	D+1F D D D   r0   c              #      K   | j         rt          d          t                      | _        | j                            t          j                  V  d| _         t                      | _        | j        V  d S )NzEngine already runningsignalT)	r?   RuntimeErrorr   
start_timer   send_catch_log_deferredengine_startedr
   
_closewaitr;   s    r.   startzExecutionEngine.start]   sr      < 	97888&&l22':P2QQQQQ"**or0   c                      t           dt          f fd            } j        st          d          d _         j                              j        d          nt          d          }|                    |          S )z$Gracefully stop the execution enginer#   c              3      K   j                             t           j                  V  j                            d            d S )Nr`   )r   rd   engine_stoppedrf   rA   _r-   s    r.   _finish_stopping_enginez5ExecutionEngine.stop.<locals>._finish_stopping_enginej   sE      ,66g>T6UUUUUO$$T*****r0   zEngine not runningFNshutdownreason)r   r
   r?   rb   rQ   close_spiderr   addBoth)r-   rm   dfds   `  r.   r@   zExecutionEngine.stopg   s     
	+( 	+ 	+ 	+ 	+ 	+ 
	+ | 	53444 {Ddk*=== 	
 {{2333r0   c                     | j         r|                                 S | j        |                     | j        d          S t	          | j                                                  S )z
        Gracefully close the execution engine.
        If it has already been started, stop it. In all cases, close the spider and the downloader.
        Nrn   ro   )r?   r@   rQ   rq   r   rU   r<   r;   s    r.   r<   zExecutionEngine.closez   sh    
 < 	99;;; 	$$J %    t,,..///r0   c                     d| _         d S )NTrR   r;   s    r.   pausezExecutionEngine.pause   s    r0   c                     d| _         d S )NFrv   r;   s    r.   unpausezExecutionEngine.unpause   s    r0   c                    | j         d S | j        J | j        rd S |                                 s=|                                 )	 |                                 s|                                 )| j         j        |                                 s	 t          | j         j                  }|                     |           nW# t          $ r d | j         _        Y n?t          $ r3 d | j         _        t                              ddd| j        i           Y nw xY w|                                 r"| j         j        r|                                  d S d S d S )Nz$Error while obtaining start requestsTrQ   exc_infoextra)rP   rQ   rR   _needs_backout_next_request_from_schedulerr    nextcrawlStopIteration	Exceptionloggererrorspider_is_idler!   _spider_idler4   s     r.   _next_requestzExecutionEngine._next_request   s   9 	F{&&&; 	4 ##%%	1133	  ##%%	1133	 9# 	$8K8K8M8M 	$$ty788 

7#### ! 0 0 0+/	(((   +/	(:!#T[1          	 TY%< 	 	  	  	  	 s   B= =D9DDc                     | j          pB| j        j        p6| j                                        p| j        j                                        S r%   )r?   rP   r&   rU   needs_backoutrV   r;   s    r.   r~   zExecutionEngine._needs_backout   sM     1y 1,,..1 | ..00		
r0   c                      j         J  j        J  j         j                                        d S                       j                  }|                     j                   |                     fd           |                     fd           |                     fd            j         |                    fd           |                     fd           |S )Nc                 f    t                               dt          |           dj        i          S )Nz&Error while handling downloader outputrQ   r{   r   infor   rQ   fr-   s    r.   <lambda>z>ExecutionEngine._next_request_from_scheduler.<locals>.<lambda>   2    fkk8,Q//- "   r0   c                 8    j                                       S r%   )rP   r9   )rl   r1   r-   s    r.   r   z>ExecutionEngine._next_request_from_scheduler.<locals>.<lambda>   s    DI44W== r0   c                 f    t                               dt          |           dj        i          S )Nz&Error while removing request from slotrQ   r{   r   r   s    r.   r   z>ExecutionEngine._next_request_from_scheduler.<locals>.<lambda>   r   r0   c                 6    j                                         S r%   )r"   r+   )rl   rP   s    r.   r   z>ExecutionEngine._next_request_from_scheduler.<locals>.<lambda>   s    DM2244 r0   c                 f    t                               dt          |           dj        i          S )Nz"Error while scheduling new requestrQ   r{   r   r   s    r.   r   z>ExecutionEngine._next_request_from_scheduler.<locals>.<lambda>   s2    fkk4,Q//- "   r0   )rP   rQ   r*   next_request	_downloadrr   _handle_downloader_output
addErrback)r-   dr1   rP   s   ` @@r.   r   z,ExecutionEngine._next_request_from_scheduler   s;   y$$${&&&)%2244 	4NN7DK00			$0':::	   	
 	
 	
 	
		=====>>>	   	
 	
 	
 y			4444555	   	
 	
 	
 r0   resultr1   c                 l     j         J t          |t          t          t          f          s"t          dt          |           d|          t          |t                    r                     |           d S  j        	                    || j                   }|
                     fd           |S )Nz;Incorrect type: expected Request, Response or Failure, got : c                 f    t                               dt          |           dj        i          S )Nz'Error while enqueuing downloader outputrQ   r{   )r   r   r   rQ   r   s    r.   r   z;ExecutionEngine._handle_downloader_output.<locals>.<lambda>   s2    fll9,Q//- #   r0   )rQ   
isinstancer   r   r   r^   typer   rV   enqueue_scraper   )r-   r   r1   r   s   `   r.   r   z)ExecutionEngine._handle_downloader_output   s     {&&&&7Hg">?? 	hdSYllhh^dhh  
 fg&& 	JJv4L''EE	   	
 	
 	
 r0   rQ   c                 &   |t          j        dt          d           | j        t	          d          | j        j                                        sdS | j        j        rdS | j        j	        dS | j        j
                                        rdS dS )NzKPassing a 'spider' argument to ExecutionEngine.spider_is_idle is deprecated   category
stacklevelEngine slot not assignedFT)warningswarnr   rP   rb   rV   is_idlerU   activer    r*   has_pending_requests)r-   rQ   s     r.   r   zExecutionEngine.spider_is_idle   s     	M]1   
 9 	;9:::| ((** 	5?! 	59# 	593355 	5tr0   c                 (   |=t          j        dt          d           || j        urt	          d|j        d          | j        t	          d|           |                     || j                   | j        j        	                                 dS )z:Inject the request into the spider <-> downloader pipelineNzBPassing a 'spider' argument to ExecutionEngine.crawl is deprecatedr   r   zThe spider z does not match the open spiderNo open spider to crawl: )
r   r   r   rQ   rb   name_schedule_requestrP   r"   r+   r-   r1   rQ   s      r.   r   zExecutionEngine.crawl  s     		MT1   
 T[( "P&+PPP   ; 	FD7DDEEEw444	##%%%%%r0   c                     | j                             t           j        ||           | j        j                            |          s)| j                             t           j        ||           d S d S )N)r1   rQ   )r   send_catch_logrequest_scheduledrP   r*   enqueue_requestrequest_droppedr   s      r.   r   z!ExecutionEngine._schedule_request  s    ##%wv 	$ 	
 	
 	
 y"227;; 	L''' (     	 	r0   c                 "   |Et          j        dt          d           || j        ur t                              d|j                   | j        t          d|           |                     ||          	                    | j
        ||          S )z`Return a Deferred which fires with a Response as result, only downloader middlewares are appliedNzEPassing a 'spider' argument to ExecutionEngine.download is deprecatedr   r   z.The spider '%s' does not match the open spiderr   )r   r   r   rQ   r   warningr   rb   r   rr   _downloadedr   s      r.   downloadzExecutionEngine.download   s     		MW1   
 T[( Dfk   ; 	FD7DDEEE~~gv..66gv
 
 	
r0   c                     | j         J | j                             |           t          |t                    r|                     ||          n|S r%   )rP   r9   r   r   r   )r-   r   r1   rQ   s       r.   r   zExecutionEngine._downloaded2  sQ     y$$$	  )))0:670K0KWt}}VV,,,QWWr0   c                 l     j         J  j                                         j        dt          t          t
          f         dt          t          t
          f         f fd} fd} j                                      }|                    |           |	                    |           |S )Nr   r#   c                    t          | t          t          f          s"t          dt	          |            d|           t          | t                    r~| j        | _        j                            | j        |           }| t          j	        t          |          ddii j                            t          j        | | j                   | S )Nz2Incorrect type: expected Response or Request, got r   r}   rQ   )ra   responser1   rQ   )r   r   r   r^   r   r1   rO   crawledr   logr   r   r   response_received)r   logkwsr1   r-   rQ   s     r.   _on_successz.ExecutionEngine._download.<locals>._on_successA  s    fx&9:: cfccY_cc   &(++ > -%,FN*226>66RR XJ 4V < <WXvDVWWW++"4#"N!	 ,    Mr0   c                 D    j         j                                         | S r%   )rP   r"   r+   rk   s    r.   _on_completez/ExecutionEngine._download.<locals>._on_completeT  s    I'')))Hr0   )
rP   r5   rQ   r	   r   r   rU   fetchaddCallbacksrr   )r-   r1   rQ   r   r   dwlds   ```   r.   r   zExecutionEngine._download9  s    y$$$	g&&& 	![F	h&7 8 	U8WCT=U 	 	 	 	 	 	 	 	&	 	 	 	 	 $$Wf55+&&&\"""r0   rG   Tr    r!   c              #     K   | j         t          d|j                  t                              dd|i           t          | j                  }t          | j        d | j	                  }| j
        j                            ||          V }t          ||||          | _         || _        t          |d          r|                    |          V  | j
                            |          V  | j	        j                            |           | j                            t&          j        |          V  | j         j                                         | j         j                            d           d S )	Nz!No free spider slot when opening zSpider openedrQ   r}   )rN   rM   open)rQ      )rP   rb   r   r   r   r   r   r   rT   rM   rV   spidermwprocess_start_requestsr   rQ   hasattrr   open_spiderstatsr   rd   spider_openedr"   r+   r,   rg   )r-   rQ   r    r!   r"   r*   s         r.   r   zExecutionEngine.open_spider]  sz      9 	TR6;RRSSSOHf+=>>> !344#t|
 
 
	  $|4KKF 
  
 
 
 )LL	9f%% 	)..(((((l&&v.....&&v...l2273HQW2XXXXX	##%%%	!!!$$$$$r0   c                    | j         J t          t          f| j                            t          j        | j                   }fd|D             }t          |v rdS |                                 rc|                    t          t          d                    }t          |t                    sJ | 	                    | j         |j
                   dS dS )a  
        Called when a spider gets idle, i.e. when there are no remaining requests to download or schedule.
        It can be called multiple times. If a handler for the spider_idle signal raises a DontCloseSpider
        exception, the spider is not closed until the next loop and this function is guaranteed to be called
        (at least) once again. A handler can raise CloseSpider to provide a custom closing reason.
        N)rQ   dont_logc                     i | ]<\  }}D ]4}t          |t                    t          |j        |          ,||j        5=S rG   )r   r   value).0rl   xexexpected_exs       r.   
<dictcomp>z0ExecutionEngine._spider_idle.<locals>.<dictcomp>  sn     
 
 
1!
 
 !W%%	
 +5QWb*A*A	

 
 
 
r0   finishedro   )rQ   r   r   r   r   spider_idler   getr   rq   rp   )r-   resdetected_exr   r   s       @r.   r   zExecutionEngine._spider_idleu  s     {&&&&4l))k * 
 

 
 
 

 
 
 k) 	4   	=k.L.L.LMMBb+.....dk")<<<<<	= 	=r0   	cancelledrp   c                      j         t          d           j         j         j         j        S t                              ddidi            j                                         }dt          dt          ffd	}|                     fd
           |	                     |d                     |                     fd           |	                     |d                     t           j         j        d          r7|                     fd           |	                     |d                     |                     fd           |	                     |d                     |                     fd           |	                     |d                     |                    fd           |                     fd           |	                     |d                     |                     fd           |	                     |d                     |                     fd           |S )z<Close (cancel) spider and clear all its outstanding requestsNr   zClosing spider (%(reason)s)rp   rQ   r   msgr#   c                 *     dt           dd f fd}|S )Nfailurer#   c                 `    t                               t          |           di           d S )NrQ   r{   )r   r   r   )r   r   rQ   s    r.   errbackzBExecutionEngine.close_spider.<locals>.log_failure.<locals>.errback  s>    "5g">">xQWFX      r0   r   )r   r   rQ   s   ` r.   log_failurez1ExecutionEngine.close_spider.<locals>.log_failure  s<     T       
 Nr0   c                 6    j                                         S r%   )rU   r<   rk   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    do3355 r0   zDownloader close failurec                 8    j                                       S r%   )rV   rq   rl   r-   rQ   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    dl77?? r0   zScraper close failurer<   c                 B    j         j                                      S r%   )rP   r*   r<   )rl   rp   r-   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    $)"5";";F"C"C r0   zScheduler close failurec                 R    j                             t           j                  S )N)ra   rQ   rp   )r   rd   spider_closedrl   rp   r-   rQ   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s*    dl::, ;   r0   z'Error while sending spider_close signalc                 F    j         j                                      S )Nro   )rM   r   rq   r   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    dl0==fV=TT r0   zStats close failurec                 F    t                               ddidi          S )NzSpider closed (%(reason)s)rp   rQ   r   )r   r   )rl   rp   rQ   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s-    fkk,6"( "   r0   c                 &    t          dd           S )NrP   setattrrk   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    gdFD99 r0   zError while unassigning slotc                 &    t          dd           S )NrQ   r   rk   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    gdHd;; r0   zError while unassigning spiderc                 .                                   S r%   )rW   r   s    r.   r   z.ExecutionEngine.close_spider.<locals>.<lambda>  s    d::6BB r0   )rP   rb   r&   r   r   r<   strr   rr   r   r   r*   )r-   rQ   rp   rs   r   s   ```  r.   rq   zExecutionEngine.close_spider  s   9 	;9:::9 	%9$$)Hf+=hPVEW 	 	
 	
 	
 ioo	S 	X 	 	 	 	 	 	 	5555666{{#=>>????????@@@{{#:;;<<<49&00 	CKKCCCCCDDDNN;;'@AABBB     	
 	
 	
 	{{#LMMNNNTTTTTTUUU{{#899:::    	
 	
 	
 	9999:::{{#ABBCCC;;;;<<<{{#CDDEEEBBBBBCCC
r0   c                 \    t          j        dt          d           | j        | j        gng S )NzUExecutionEngine.open_spiders is deprecated, please use ExecutionEngine.spider insteadr   r   )r   r   r   rQ   r;   s    r.   open_spiderszExecutionEngine.open_spiders  s;    c-	
 	
 	
 	

 !%?}}R?r0   c                 d    t          j        dt          d           t          | j                   S )Nz*ExecutionEngine.has_capacity is deprecatedr   )r   )r   r   r   rF   rP   r;   s    r.   has_capacityzExecutionEngine.has_capacity  s7    8$	
 	
 	
 	

 	??""r0   c                     t          j        dt          d           | j        t	          d          |                     ||           d S )NzlExecutionEngine.schedule is deprecated, please use ExecutionEngine.crawl or ExecutionEngine.download insteadr   r   r   )r   r   r   rP   rb   r   r   s      r.   r+   zExecutionEngine.schedule  s^    H-		
 	
 	
 	
 9 	;9:::w/////r0   rB   r%   )rG   T)r   )*rC   rD   rE   r   r/   r   r   rS   r   r
   rg   r@   r<   rw   ry   r   rF   r~   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rq   propertylistr   r   r+   rG   r0   r.   rI   rI   B   s       > >T > > > >	\ 	d 	 	 	 	 x    _4h 4 4 4 4&0x 0 0 0 0             B
 
 
 
 
"hx.@ " " " "HGXw67BI	(	   2 Xf%5     && &W &hv.> &$ & & & &" & T    
 
 
&1A 
X 
 
 
 
$XHg-.X9@XJPX	x!	"X X X X" "(62B "x " " " "H SW% %%.6%LP% % % _%.= = = =2< <6 <3 < < < < <| @d @ @ @ X@#d # # # #	0 	0 	0D 	0 	0 	0 	0 	0 	0r0   rI   )/__doc__loggingr   r   typingr   r   r   r   r   r	   twisted.internet.deferr
   r   r   twisted.internet.taskr   twisted.python.failurer   scrapyr   scrapy.core.scraperr   scrapy.exceptionsr   r   r   scrapy.httpr   r   scrapy.settingsr   scrapy.spidersr   scrapy.utils.logr   r   scrapy.utils.miscr   r   scrapy.utils.reactorr   	getLoggerrC   r   r   rI   rG   r0   r.   <module>r     s            E E E E E E E E E E E E E E E E E E E E E E E E E E - - - - - - * * * * * *       ' ' ' ' ' ' T T T T T T T T T T ) ) ) ) ) ) ) ) ( ( ( ( ( ( ! ! ! ! ! ! F F F F F F F F : : : : : : : : . . . . . .		8	$	$"( "( "( "( "( "( "( "(Jd0 d0 d0 d0 d0 d0 d0 d0 d0 d0r0   