
    3 d                        d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ erdd	lmZ  G d
 de          ZddlmZmZ ddlmZmZ ddlmZ dS )zM
Base class for Scrapy spiders

See documentation in docs/topics/spiders.rst
    )annotationsN)TYPE_CHECKINGOptional)signals)Request)
object_ref)url_is_from_spider)Crawlerc                      e Zd ZU dZded<   dZded<   ddZed             Ze	j
        fd	Zed
             ZddZd Zd Zd Zed             Zed             Zed             Zd ZdS )SpiderzQBase class for scrapy spiders. All spiders must inherit from this
    class.
    strnameNzOptional[dict]custom_settingsc                    ||| _         n5t          | dd           s$t          t          |           j         d          | j                            |           t          | d          s	g | _        d S d S )Nr   z must have a name
start_urls)	r   getattr
ValueErrortype__name____dict__updatehasattrr   )selfr   kwargss      7lib/python3.11/site-packages/scrapy/spiders/__init__.py__init__zSpider.__init__   s     	HDIIvt,, 	HT

 3FFFGGGV$$$t\** 	! DOOO	! 	!    c                b    t          j        | j                  }t          j        |d| i          S )Nspider)logging	getLoggerr   LoggerAdapter)r   loggers     r   r#   zSpider.logger%   s,    "49--$Vh-=>>>r   c                .     | j         j        ||fi | dS )zLog the given message at the given log level

        This helper wraps a log call to the logger within the spider, but you
        can use it directly (e.g. Spider.logger.info('msg')) or use any other
        Python logger too.
        N)r#   log)r   messagelevelkws       r   r%   z
Spider.log*   s(     	w--"-----r   c                @     | |i |}|                     |           |S N)_set_crawler)clscrawlerargsr   r   s        r   from_crawlerzSpider.from_crawler3   s/    d%f%%G$$$r   r-   r
   c                    || _         |j        | _        |j                            | j        t          j                   d S r*   )r-   settingsr   connectclosespider_closed)r   r-   s     r   r+   zSpider._set_crawler9   s6    (
G,ABBBBBr   c              #     K   | j         st          | d          rt          d          | j         D ]}t          |d          V  d S )N	start_urlzyCrawling could not start: 'start_urls' not found or empty (but found 'start_url' attribute instead, did you miss an 's'?)T)dont_filter)r   r   AttributeErrorr   )r   urls     r   start_requestszSpider.start_requests>   sq       	74#=#= 	 (  
 ? 	1 	1C#40000000	1 	1r   c                     | j         |fi |S r*   )parser   responser   s      r   _parsezSpider._parseH   s    tz(--f---r   c                :    t          | j        j         d          )Nz.parse callback is not defined)NotImplementedError	__class__r   r=   s      r   r<   zSpider.parseK   s%    !~&FFF
 
 	
r   c                B    |                     | j        pi d           d S )Nr   )priority)setdictr   )r,   r1   s     r   update_settingszSpider.update_settingsP   s)    ,2XFFFFFr   c                ,    t          |j        |           S r*   )r	   r9   )r,   requests     r   handles_requestzSpider.handles_requestT   s    !'+s333r   c                \    t          | dd           }t          |          r ||          S d S )Nclosed)r   callable)r   reasonrK   s      r   r3   zSpider.closeX   s;    400F 	"6&>>!	" 	"r   c                d    dt          |           j         d| j        dt          |           ddS )N< z at 0x0x>)r   r   r   id)r   s    r   __repr__zSpider.__repr__^   s7    J4::&JJJJBtHHJJJJJr   r*   )r-   r
   )r   
__module____qualname____doc____annotations__r   r   propertyr#   r    DEBUGr%   classmethodr/   r+   r:   r?   r<   rF   rI   staticmethodr3   rT    r   r   r   r      sN          III&*O****! ! ! ! ? ? X? ") . . . .   [
C C C C
1 1 1. . .
 
 

 G G [G 4 4 [4 " " \"
K K K K Kr   r   )CrawlSpiderRule)CSVFeedSpiderXMLFeedSpider)SitemapSpider)rW   
__future__r   r    typingr   r   scrapyr   scrapy.httpr   scrapy.utils.trackrefr   scrapy.utils.urlr	   scrapy.crawlerr
   r   scrapy.spiders.crawlr^   r_   scrapy.spiders.feedr`   ra   scrapy.spiders.sitemaprb   r]   r   r   <module>rm      s+   
 # " " " " "  * * * * * * * *             , , , , , , / / / / / / '&&&&&&KK KK KK KK KKZ KK KK KK^ 3 2 2 2 2 2 2 2 < < < < < < < < 0 0 0 0 0 0 0 0r   