
    3 de9                       d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ 	 d dlmZ n# e$ r dZY nw xY wd dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2 erd dl3m4Z4  ej5        e6          Z7 G d d          Z8 G d d          Z9 G d de9          Z:dS )    )annotationsN)TYPE_CHECKINGOptional)defer)DoesNotImplement)MultipleInvalid)verifyClass)SpidersignalsExecutionEngine)ScrapyDeprecationWarning)ExtensionManager)ISpiderLoader)Settingsoverridden_settings)SignalManager)LogCounterHandlerconfigure_loggingget_scrapy_root_handlerinstall_scrapy_root_handlerlog_reactor_infolog_scrapy_info)create_instanceload_object)install_shutdown_handlerssignal_names)install_reactoris_asyncio_reactor_installed#verify_installed_asyncio_event_loopverify_installed_reactor)RequestFingerprinterc                  d    e Zd Zd
ddZej        d             Zd Zd Zej        d	             Z	dS )CrawlerNFinit_reactorboolc                  	 t          |t                    rt          d          t          |t                    s|t	          |          }|| _        |                                | _        | j                            | j                   t          |           | _
         t          | j        d                   |           | _        t          | | j                            d                    	t          j                            	           t          t%          | j                            }t&                              ddt+          j        |          i           t/                      t1          | j                   	fd| _        | j
                            | j        t          j                   t          | j        d                   }|                    |           | _        t=          t          | j        d	                   | j        | 
          | _        | j        d         }| j        d         }|r'|rtA          ||           nddl!m"} tG                       |r.tI          |           tK                      r|rtM          |           tO          j        |           | _(        | j        )                                 d| _*        d | _+        d | _,        d S )Nz5The spidercls argument must be a class, not an objectSTATS_CLASS	LOG_LEVEL)levelz!Overridden settings:
%(settings)ssettingsc                 B    t           j                                       S N)loggingrootremoveHandler)handlers   .lib/python3.11/site-packages/scrapy/crawler.py<lambda>z"Crawler.__init__.<locals>.<lambda>P   s    (B(B7(K(K     LOG_FORMATTERREQUEST_FINGERPRINTER_CLASS)r+   crawlerTWISTED_REACTORASYNCIO_EVENT_LOOPr   reactorF)-
isinstancer
   
ValueErrordictr   	spiderclscopyr+   update_settingsr   r   r   statsr   getr.   r/   
addHandlerr   loggerinfopprintpformatr   r   _Crawler__remove_handlerconnectengine_stoppedfrom_crawlerlogformatterr   request_fingerprinterr   twisted.internetr;   r   r!   r   r    r   
extensionsfreezecrawlingspiderengine)
selfr?   r+   r%   dlf_clsreactor_class
event_loopr;   r1   s
            @r2   __init__zCrawler.__init__4   s   i(( 	VTUUUh%% 	* 	*))H" &&t}555$T**>[}!=>>tDD
#D0A0A+0N0NOOO((($T]33440:v~a?P?P2Q	
 	
 	
 #$$ 	7'666 !L K K KT2G4JKKKT]?;<<"//55;J&CDEE]<
 <
 <
" &78]#78
 	  5z::::444444 	@$]333+-- @* @3J???*7==15r4   c              /    K   | j         rt          d          d| _         	  | j        |i || _        |                                 | _        t          | j                                                  }| j                            | j        |          V  t          j
        | j        j                  V  d S # t          $ r+ d| _         | j        | j                                        V   w xY w)NzCrawling already taking placeTF)rR   RuntimeError_create_spiderrS   _create_enginerT   iterstart_requestsopen_spiderr   maybeDeferredstart	Exceptionclose)rU   argskwargsr`   s       r2   crawlzCrawler.crawlr   s      = 	@>???
	-$-t>v>>DK--//DK!$+"<"<">">??N+))$+~FFFFF%dk&78888888 	 	 	!DM{ *k'')))))		s   BB6 65C+c                .     | j         j        | g|R i |S r-   )r?   rL   )rU   rf   rg   s      r2   r]   zCrawler._create_spider   s'    *t~*4A$AAA&AAAr4   c                *     t            fd          S )Nc                ,                                     S r-   stop)_rU   s    r2   r3   z(Crawler._create_engine.<locals>.<lambda>   s    tyy{{ r4   r   rU   s   `r2   r^   zCrawler._create_engine   s    t%:%:%:%:;;;r4   c              #  j   K   | j         r)d| _         t          j        | j        j                  V  dS dS )zoStarts a graceful stop of the crawler and returns a deferred that is
        fired when the crawler is stopped.FN)rR   r   rb   rT   rm   ro   s    r2   rm   zCrawler.stop   sE       = 	8!DM%dk&67777777	8 	8r4   NF)r%   r&   )
__name__
__module____qualname__rZ   r   inlineCallbacksrh   r]   r^   rm    r4   r2   r$   r$   3   s        <6 <6 <6 <6 <6|   "B B B< < < 8 8 8 8 8r4   r$   c                      e Zd ZdZ ed d          Zed             ZddZed             Z	d	 Z
d
 Zd Zd Zd Zej        d             ZdS )CrawlerRunnera  
    This is a convenient helper class that keeps track of, manages and runs
    crawlers inside an already setup :mod:`~twisted.internet.reactor`.

    The CrawlerRunner object must be instantiated with a
    :class:`~scrapy.settings.Settings` object.

    This class shouldn't be needed (since Scrapy is responsible of using it
    accordingly) unless writing scripts that manually handle the crawling
    process. See :ref:`run-from-script` for an example.
    c                    | j         S r-   )	_crawlersro   s    r2   r3   zCrawlerRunner.<lambda>   s    T^ r4   zeSet of :class:`crawlers <scrapy.crawler.Crawler>` started by :meth:`crawl` and managed by this class.)docc                L   |                      d          }t          |          }t          rt          t          fnt          }	 t	          t
          |           n'# |$ r t          j        dt          d           Y nw xY w|	                    | 
                                          S )z'Get SpiderLoader instance from settingsSPIDER_LOADER_CLASSzSPIDER_LOADER_CLASS (previously named SPIDER_MANAGER_CLASS) does not fully implement scrapy.interfaces.ISpiderLoader interface. Please add all missing methods to avoid unexpected runtime errors.   category
stacklevel)rC   r   r   r   r	   r   warningswarnr   from_settings
frozencopy)r+   cls_path
loader_clsexcss       r2   _get_spider_loaderz CrawlerRunner._get_spider_loader   s     << 566 **
3BX//HX 			z2222 	 	 	MU 2     	 ''(;(;(=(=>>>s   A !A<;A<Nc                    t          |t                    s|t          |          }|| _        |                     |          | _        t                      | _        t                      | _        d| _	        d S rq   )
r<   r>   r   r+   r   spider_loadersetrz   _activebootstrap_failed)rU   r+   s     r2   rZ   zCrawlerRunner.__init__   si    h%% 	* 	*))H !44X>>uu %r4   c                H    t          j        dt          d           | j        S )NzJCrawlerRunner.spiders attribute is renamed to CrawlerRunner.spider_loader.r~   r   )r   r   r   r   ro   s    r2   spiderszCrawlerRunner.spiders   s1    +-		
 	
 	
 	
 !!r4   c                    t          |t                    rt          d          |                     |          } | j        |g|R i |S )ae  
        Run a crawler with the provided arguments.

        It will call the given Crawler's :meth:`~Crawler.crawl` method, while
        keeping track of it so it can be stopped later.

        If ``crawler_or_spidercls`` isn't a :class:`~scrapy.crawler.Crawler`
        instance, this method will try to create one using this parameter as
        the spider class given to it.

        Returns a deferred that is fired when the crawling is finished.

        :param crawler_or_spidercls: already created crawler, or a spider class
            or spider's name inside the project to create it
        :type crawler_or_spidercls: :class:`~scrapy.crawler.Crawler` instance,
            :class:`~scrapy.spiders.Spider` subclass or string

        :param args: arguments to initialize the spider

        :param kwargs: keyword arguments to initialize the spider
        lThe crawler_or_spidercls argument cannot be a spider object, it must be a spider class (or a Crawler object))r<   r
   r=   create_crawler_crawl)rU   crawler_or_spiderclsrf   rg   r7   s        r2   rh   zCrawlerRunner.crawl   sg    , *F33 	B   %%&:;;t{74T444V444r4   c                      j                                         j        |i | j                                        fd}                    |          S )Nc                    j                                        j                                       xj        t	          dd            z  c_        | S )NrS   )crawlersdiscardr   r   getattr)resultr7   rV   rU   s    r2   _donez#CrawlerRunner._crawl.<locals>._done   sW    M!!'***L  ###!!(D)I)I%II!!Mr4   )r   addrh   r   addBoth)rU   r7   rf   rg   r   rV   s   ``   @r2   r   zCrawlerRunner._crawl   s~    '"""GM4*6**	 	 	 	 	 	 	 yyr4   c                    t          |t                    rt          d          t          |t                    r|S |                     |          S )a  
        Return a :class:`~scrapy.crawler.Crawler` object.

        * If ``crawler_or_spidercls`` is a Crawler, it is returned as-is.
        * If ``crawler_or_spidercls`` is a Spider subclass, a new Crawler
          is constructed for it.
        * If ``crawler_or_spidercls`` is a string, this function finds
          a spider with this name in a Scrapy project (using spider loader),
          then creates a Crawler instance for it.
        r   )r<   r
   r=   r$   _create_crawler)rU   r   s     r2   r   zCrawlerRunner.create_crawler   s_     *F33 	B   *G44 	(''##$8999r4   c                    t          |t                    r| j                            |          }t	          || j                  S r-   )r<   strr   loadr$   r+   )rU   r?   s     r2   r   zCrawlerRunner._create_crawler  s<    i%% 	;*//	::Iy$-000r4   c                b    t          j        d t          | j                  D                       S )z
        Stops simultaneously all the crawling jobs taking place.

        Returns a deferred that is fired when they all have ended.
        c                6    g | ]}|                                 S rv   rl   ).0cs     r2   
<listcomp>z&CrawlerRunner.stop.<locals>.<listcomp>  s     "I"I"I16688"I"I"Ir4   )r   DeferredListlistr   ro   s    r2   rm   zCrawlerRunner.stop  s.     !"I"IT$-5H5H"I"I"IJJJr4   c              #  `   K   | j         r$t          j        | j                   V  | j         "dS dS )z
        join()

        Returns a deferred that is fired when all managed :attr:`crawlers` have
        completed their executions.
        N)r   r   r   ro   s    r2   joinzCrawlerRunner.join  sK       l 	3$T\22222 l 	3 	3 	3 	3 	3r4   r-   )rr   rs   rt   __doc__propertyr   staticmethodr   rZ   r   rh   r   r   r   rm   r   ru   r   rv   r4   r2   rx   rx      s        
 
 x##3  H ? ? \?&& & & & " " X"5 5 5<     : : :(1 1 1
K K K 3 3 3 3 3r4   rx   c                  L     e Zd ZdZd fd	Zd Zd Zd ZddZd	 Z	dd
Z
 xZS )CrawlerProcessa  
    A class to run multiple scrapy crawlers in a process simultaneously.

    This class extends :class:`~scrapy.crawler.CrawlerRunner` by adding support
    for starting a :mod:`~twisted.internet.reactor` and handling shutdown
    signals, like the keyboard interrupt command Ctrl-C. It also configures
    top-level logging.

    This utility should be a better fit than
    :class:`~scrapy.crawler.CrawlerRunner` if you aren't running another
    :mod:`~twisted.internet.reactor` within your application.

    The CrawlerProcess object must be instantiated with a
    :class:`~scrapy.settings.Settings` object.

    :param install_root_handler: whether to install root logging handler
        (default: True)

    This class shouldn't be needed (since Scrapy is responsible of using it
    accordingly) unless writing scripts that manually handle the crawling
    process. See :ref:`run-from-script` for an example.
    NTc                    t                                          |           t          | j        |           t	          | j                   d| _        d S rq   )superrZ   r   r+   r   _initialized_reactor)rU   r+   install_root_handler	__class__s      r2   rZ   zCrawlerProcess.__init__=  sL    """$-)=>>>&&&$)!!!r4   c                    ddl m} t          | j                   t          |         }t
                              dd|i           |                    | j                   d S )Nr   r:   zDReceived %(signame)s, shutting down gracefully. Send again to force signame)	rO   r;   r   _signal_killr   rE   rF   callFromThread_graceful_stop_reactorrU   signumrn   r;   r   s        r2   _signal_shutdownzCrawlerProcess._signal_shutdownC  so    ,,,,,,!$"3444v&R 	
 	
 	
 	t:;;;;;r4   c                    ddl m} t          t          j                   t
          |         }t                              dd|i           |                    | j	                   d S )Nr   r:   z4Received %(signame)s twice, forcing unclean shutdownr   )
rO   r;   r   signalSIG_IGNr   rE   rF   r   _stop_reactorr   s        r2   r   zCrawlerProcess._signal_killN  sn    ,,,,,,!&.111v&BYPWDX	
 	
 	
 	t122222r4   c                    t          |t                    r| j                            |          }| j         }d| _        t          || j        |          S )NT)r%   )r<   r   r   r   r   r$   r+   )rU   r?   r%   s      r2   r   zCrawlerProcess._create_crawlerX  sT    i%% 	;*//	::I44$(!y$-lKKKKr4   c                $   ddl m} |r7|                                 }|j        rdS |                    | j                   |rt          | j                   t          | j	        d                   }t          || j	        | |          }|                                 |                                }|                    | j	                            d                     |                    dd| j                   |                    d	
           dS )aV  
        This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool
        size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache
        based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`.

        If ``stop_after_crawl`` is True, the reactor will be stopped after all
        crawlers have finished, using :meth:`join`.

        :param bool stop_after_crawl: stop or not the reactor when all
            crawlers have finished

        :param bool install_signal_handlers: whether to install the shutdown
            handlers (default: True)
        r   r:   NDNS_RESOLVERREACTOR_THREADPOOL_MAXSIZE)
maxthreadsbeforeshutdownF)installSignalHandlers)rO   r;   r   calledr   r   r   r   r   r+   r   install_on_reactorgetThreadPooladjustPoolsizegetintaddSystemEventTriggerrm   run)rU   stop_after_crawlinstall_signal_handlersr;   rV   resolver_classresolvertps           r2   rc   zCrawlerProcess.start_  s    	-,,,,, 	*		Ax IId()))" 	=%d&;<<<$T]>%BCC">4=$PWXXX##%%%""$$
T]%9%9:V%W%WXXX%%h
DIFFF%00000r4   c                b    |                                  }|                    | j                   |S r-   )rm   r   r   )rU   rV   s     r2   r   z%CrawlerProcess._graceful_stop_reactor  s)    IIKK			$$%%%r4   c                ^    ddl m} 	 |                                 d S # t          $ r Y d S w xY w)Nr   r:   )rO   r;   rm   r\   )rU   rn   r;   s      r2   r   zCrawlerProcess._stop_reactor  sL    ,,,,,,	LLNNNNN 	 	 	DD	s    
,,)NT)TTr-   )rr   rs   rt   r   rZ   r   r   r   rc   r   r   __classcell__)r   s   @r2   r   r   %  s         .* * * * * *	< 	< 	<3 3 3L L L 1  1  1  1D  
       r4   r   );
__future__r   r.   rG   r   r   typingr   r   rO   r   zope.interface.exceptionsr   r   ImportErrorzope.interface.verifyr	   scrapyr
   r   scrapy.core.enginer   scrapy.exceptionsr   scrapy.extensionr   scrapy.interfacesr   scrapy.settingsr   r   scrapy.signalmanagerr   scrapy.utils.logr   r   r   r   r   r   scrapy.utils.miscr   r   scrapy.utils.ossignalr   r   scrapy.utils.reactorr   r   r    r!   scrapy.utils.requestr"   	getLoggerrr   rE   r$   rx   r   rv   r4   r2   <module>r      s   " " " " " "     * * * * * * * * " " " " " " 6 6 6 6 6 69999999   OOO . - - - - - " " " " " " " " . . . . . . 6 6 6 6 6 6 - - - - - - + + + + + + 9 9 9 9 9 9 9 9 . . . . . .                ; : : : : : : : I I I I I I I I             :999999 
	8	$	$]8 ]8 ]8 ]8 ]8 ]8 ]8 ]8@O3 O3 O3 O3 O3 O3 O3 O3dg g g g g] g g g g gs   3 ==