
    3 d                         d dl Z d dlZd dlmZmZ d dlmZ  e j        e          Z	ddZ
 G d de          Z G d	 d
e          Z G d de          Z G d de          Z G d de          ZdS )    N)ABCMetaabstractmethod)
to_unicodeFc                     	 |rt          |           } n|                     d          } nB# t          $ r5 t                              dt          j                    d|i           d} Y nw xY w| S )Nzutf-8zFailure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.spider)exc_infoextra )r   decodeUnicodeDecodeErrorloggerwarningsysr   )robotstxt_bodyr   to_native_str_types      0lib/python3.11/site-packages/scrapy/robotstxt.pydecode_robotstxtr   
   s     	<'77NN+227;;N 	 	 	 	P\^^V$	 	 	
 	
 	
 	 s   '* <A)(A)c                   J    e Zd Zeed                         Zed             ZdS )RobotParserc                     dS )ap  Parse the content of a robots.txt_ file as bytes. This must be a class method.
        It must return a new instance of the parser backend.

        :param crawler: crawler which made the request
        :type crawler: :class:`~scrapy.crawler.Crawler` instance

        :param robotstxt_body: content of a robots.txt_ file.
        :type robotstxt_body: bytes
        N )clscrawlerr   s      r   from_crawlerzRobotParser.from_crawler   s	     	    c                     dS )zReturn ``True`` if  ``user_agent`` is allowed to crawl ``url``, otherwise return ``False``.

        :param url: Absolute URL
        :type url: str

        :param user_agent: User agent
        :type user_agent: str
        Nr   selfurl
user_agents      r   allowedzRobotParser.allowed,   s	     	r   N)__name__
__module____qualname__classmethodr   r   r!   r   r   r   r   r      sR        
 
 ^ [
 	 	 ^	 	 	r   r   )	metaclassc                   0    e Zd Zd Zed             Zd ZdS )PythonRobotParserc                     ddl m} || _        t          ||d          } |            | _        | j                            |                                           d S )Nr   )RobotFileParserT)r   )urllib.robotparserr*   r   r   rpparse
splitlines)r   r   r   r*   s       r   __init__zPythonRobotParser.__init__:   sm    666666)Ft
 
 
 "/##n//1122222r   c                 4    |sd n|j         } | ||          }|S Nr   r   r   r   r   os        r   r   zPythonRobotParser.from_crawlerD   (    $8'.C''r   c                 t    t          |          }t          |          }| j                            ||          S r1   r   r,   	can_fetchr   s      r   r!   zPythonRobotParser.allowedJ   s2    
++
oow  S111r   Nr"   r#   r$   r/   r%   r   r!   r   r   r   r(   r(   9   sM        3 3 3   [
2 2 2 2 2r   r(   c                   0    e Zd Zd Zed             Zd ZdS )ReppyRobotParserc                 V    ddl m} || _        |                    d|          | _        d S )Nr   )Robotsr
   )reppy.robotsr=   r   r-   r,   )r   r   r   r=   s       r   r/   zReppyRobotParser.__init__Q   s4    '''''',,r>22r   c                 4    |sd n|j         } | ||          }|S r1   r2   r3   s        r   r   zReppyRobotParser.from_crawlerW   r5   r   c                 8    | j                             ||          S r1   )r,   r!   r   s      r   r!   zReppyRobotParser.allowed]   s    wsJ///r   Nr9   r   r   r   r;   r;   P   sM        3 3 3   [
0 0 0 0 0r   r;   c                   0    e Zd Zd Zed             Zd ZdS )RerpRobotParserc                     ddl m} || _         |            | _        t	          ||          }| j                            |           d S )Nr   )RobotExclusionRulesParser)robotexclusionrulesparserrD   r   r,   r   r-   )r   r   r   rD   s       r   r/   zRerpRobotParser.__init__b   sT    GGGGGG++--).&AAn%%%%%r   c                 4    |sd n|j         } | ||          }|S r1   r2   r3   s        r   r   zRerpRobotParser.from_crawlerj   r5   r   c                 t    t          |          }t          |          }| j                            ||          S r1   )r   r,   
is_allowedr   s      r   r!   zRerpRobotParser.allowedp   s2    
++
oow!!*c222r   Nr9   r   r   r   rB   rB   a   sM        & & &   [
3 3 3 3 3r   rB   c                   0    e Zd Zd Zed             Zd ZdS )ProtegoRobotParserc                 t    ddl m} || _        t          ||          }|                    |          | _        d S )Nr   )Protego)protegorL   r   r   r-   r,   )r   r   r   rL   s       r   r/   zProtegoRobotParser.__init__w   sA    ######).&AA--//r   c                 4    |sd n|j         } | ||          }|S r1   r2   r3   s        r   r   zProtegoRobotParser.from_crawler~   r5   r   c                 t    t          |          }t          |          }| j                            ||          S r1   r7   r   s      r   r!   zProtegoRobotParser.allowed   s2    
++
oow  j111r   Nr9   r   r   r   rJ   rJ   v   sM        0 0 0   [
2 2 2 2 2r   rJ   )F)loggingr   abcr   r   scrapy.utils.pythonr   	getLoggerr"   r   r   r   r(   r;   rB   rJ   r   r   r   <module>rT      sH    



 ' ' ' ' ' ' ' ' * * * * * *		8	$	$   &    G    82 2 2 2 2 2 2 2.0 0 0 0 0{ 0 0 0"3 3 3 3 3k 3 3 3*2 2 2 2 2 2 2 2 2 2r   