
    3 d-                         d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ  ej        e          Z G d
 d          ZdS )z
This is a middleware to respect robots.txt policies. To activate it you must
enable this middleware and enable the ROBOTSTXT_OBEY setting.

    N)DeferredmaybeDeferred)IgnoreRequestNotConfigured)Request)NO_CALLBACK)urlparse_cached)failure_to_exc_info)load_objectc                   R    e Zd ZdZd Zed             Zd Zd Zd Z	d Z
d Zd	 Zd
S )RobotsTxtMiddlewarei  c                 |   |j                             d          st          |j                             dd          | _        |j                             dd           | _        || _        i | _        t          |j                             d                    | _	        | j	        
                    | j        d           d S )NROBOTSTXT_OBEY
USER_AGENTScrapyROBOTSTXT_USER_AGENTROBOTSTXT_PARSER    )settingsgetboolr   get_default_useragent_robotstxt_useragentcrawler_parsersr   _parserimplfrom_crawler)selfr   s     Flib/python3.11/site-packages/scrapy/downloadermiddlewares/robotstxt.py__init__zRobotsTxtMiddleware.__init__   s    ''(899 	 ")"2"6"6|X"N"N$+$4$8$89OQU$V$V!&w'7';';<N'O'OPP 	%%dlC88888r   c                      | |          S N )clsr   s     r   r   z RobotsTxtMiddleware.from_crawler$   s    s7||r   c                    |j                             d          rd S |j                            d          s|j                            d          rd S t	          | j        ||          }|                    | j        ||           |S )Ndont_obey_robotstxtzdata:zfile:)metar   url
startswithr   robot_parseraddCallbackprocess_request_2)r   requestspiderds       r   process_requestz#RobotsTxtMiddleware.process_request(   s    <122 	F;!!'** 	gk.D.DW.M.M 	F$+Wf==	d,gv>>>r   c                 4   |d S | j         }|s |j                            d| j                  }|                    |j        |          sOt                              dd|id|i           | j        j	        
                    d           t          d          d S )Ns
   User-Agentz$Forbidden by robots.txt: %(request)sr-   r.   )extrazrobotstxt/forbiddenzForbidden by robots.txt)r   headersr   r   allowedr(   loggerdebugr   stats	inc_valuer   )r   rpr-   r.   	useragents        r   r,   z%RobotsTxtMiddleware.process_request_21   s     	F-	 	T++M4;RSSIzz'+y11 	;LL6G$(    
 L(()>??? 9:::	; 	;r   c                   	 t          |          }|j        }|| j        vrt                      | j        |<   |j         d|j         d}t          || j        ddit                    }| j        j	        
                    |          }|                    | j        ||           |                    | j        ||           |                    | j        |           | j        j                            d           t%          | j        |         t                    r5t                      		fd}| j        |                             |           	S | j        |         S )Nz://z/robots.txtr&   T)priorityr'   callbackzrobotstxt/request_countc                 2                         |            | S r"   )r=   )resultr/   s    r   cbz,RobotsTxtMiddleware.robot_parser.<locals>.cbW   s    

6"""r   )r	   netlocr   r   schemer   DOWNLOAD_PRIORITYr   r   enginedownloadr+   _parse_robots
addErrback	_logerror_robots_errorr7   r8   
isinstance)
r   r-   r.   r(   rA   	robotsurl	robotsreqdfdr@   r/   s
            @r   r*   z RobotsTxtMiddleware.robot_parserA   s_   g&&& 	D$,JJDM&!:AA#*AAAI/+T2$	  I ,%..y99COOD.???NN4>9f===NN4-v666L(()BCCCdmF+X66 	

A     M&!--b111H}V$$r   c                     |j         t          ur5t                              d||j        dt          |          d|i           |S )Nz.Error downloading %(request)s: %(f_exception)s)r-   f_exceptionr.   )exc_infor2   )typer   r5   errorvaluer
   )r   failurer-   r.   s       r   rH   zRobotsTxtMiddleware._logerror_   sV    <}, 	LL@#GMBB,W55(	     r   c                 4   | j         j                            d           | j         j                            d|j                    | j                            | j         |j                  }| j        |         }|| j        |<   |                    |           d S )Nzrobotstxt/response_countz robotstxt/response_status_count/)	r   r7   r8   statusr   r   bodyr   r=   )r   responserA   r.   r9   rp_dfds         r   rF   z!RobotsTxtMiddleware._parse_robotsi   s    $$%?@@@$$@x@@	
 	
 	
 **4<GGv& "fr   c                     |j         t          ur)d|j          }| j        j                            |           | j        |         }d | j        |<   |                    d            d S )Nzrobotstxt/exception_count/)rQ   r   r   r7   r8   r   r=   )r   rT   rA   keyrY   s        r   rI   z!RobotsTxtMiddleware._robots_errors   si    <}, 	.=w|==CL((---v& $fr   N)__name__
__module____qualname__rC   r    classmethodr   r0   r,   r*   rH   rF   rI   r#   r   r   r   r      s        
9 
9 
9   [  ; ; ; % % %<        r   r   )__doc__loggingtwisted.internet.deferr   r   scrapy.exceptionsr   r   scrapy.httpr   scrapy.http.requestr   scrapy.utils.httpobjr	   scrapy.utils.logr
   scrapy.utils.miscr   	getLoggerr\   r5   r   r#   r   r   <module>rj      s      : : : : : : : : : : : : : : : :       + + + + + + 0 0 0 0 0 0 0 0 0 0 0 0 ) ) ) ) ) )		8	$	$d d d d d d d d d dr   