o
    ǋ|c$                     @   s\   d Z ddlZddlZddlZdgZeddZG dd dZG dd dZ	G d	d
 d
Z
dS )a%   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
    NRobotFileParserRequestRatezrequests secondsc                   @   sr   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )r   zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

     c                 C   s2   g | _ g | _d | _d| _d| _| | d| _d S )NFr   )entriessitemapsdefault_entrydisallow_all	allow_allset_urllast_checkedselfurl r   $  /home/conda/feedstock_root/build_artifacts/python-split_1669104742391/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehol/lib/python3.10/urllib/robotparser.py__init__   s   

zRobotFileParser.__init__c                 C   s   | j S )zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r   r   r   r   r   mtime%   s   zRobotFileParser.mtimec                 C   s   ddl }|  | _dS )zYSets the time the robots.txt file was last fetched to the
        current time.

        r   N)timer   )r   r   r   r   r   modified.   s   zRobotFileParser.modifiedc                 C   s&   || _ tj|dd \| _| _dS )z,Sets the URL referring to a robots.txt file.      N)r   urllibparseurlparsehostpathr   r   r   r   r
   6   s    zRobotFileParser.set_urlc              
   C   s   z	t j| j}W n@ t jjyI } z2|jdv rd| _n|jdkr0|jdk r>d| _W Y d}~dS W Y d}~dS W Y d}~dS W Y d}~dS d}~ww |	 }| 
|d  dS )z4Reads the robots.txt URL and feeds it to the parser.)i  i  Ti  i  Nzutf-8)r   Zrequesturlopenr   error	HTTPErrorcoder   r	   readr   decode
splitlines)r   ferrrawr   r   r   r!   ;   s   
zRobotFileParser.readc                 C   s2   d|j v r| jd u r|| _d S d S | j| d S N*)
useragentsr   r   append)r   entryr   r   r   
_add_entryH   s
   


zRobotFileParser._add_entryc                 C   sJ  d}t  }|   |D ]
}|s(|dkrt  }d}n|dkr(| | t  }d}|d}|dkr7|d| }| }|s>q|dd}t|dkr|d   |d< tj	
|d  |d< |d dkr~|dkrs| | t  }|j|d  d}q|d dkr|dkr|jt|d d	 d}q|d d
kr|dkr|jt|d d d}q|d dkr|dkr|d   rt|d |_d}q|d dkr|dkr|d d}t|dkr|d   r|d   rtt|d t|d |_d}q|d dkr| j|d  q|dkr#| | dS dS )zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        r   r      #N:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rate/Zsitemap)Entryr   r,   findstripsplitlenlowerr   r   unquoter)   r*   	rulelinesRuleLineisdigitintdelayr   req_rater   )r   linesstater+   lineiZnumbersr   r   r   r   Q   sv   





 
zRobotFileParser.parsec                 C   s   | j rdS | jr
dS | jsdS tjtj|}tjdd|j|j	|j
|jf}tj|}|s3d}| jD ]}||rD||  S q6| jrN| j|S dS )z=using the parsed robots.txt decide if useragent can fetch urlFTr   r0   )r   r	   r   r   r   r   r7   
urlunparser   paramsqueryfragmentquoter   
applies_to	allowancer   )r   	useragentr   Z
parsed_urlr+   r   r   r   	can_fetch   s(   

zRobotFileParser.can_fetchc                 C   >   |   sd S | jD ]}||r|j  S q	| jr| jjS d S N)r   r   rG   r<   r   r   rI   r+   r   r   r   crawl_delay      


zRobotFileParser.crawl_delayc                 C   rK   rL   )r   r   rG   r=   r   rM   r   r   r   request_rate   rO   zRobotFileParser.request_ratec                 C   s   | j sd S | j S rL   )r   r   r   r   r   	site_maps   s   zRobotFileParser.site_mapsc                 C   s,   | j }| jd ur|| jg }dtt|S )Nz

)r   r   joinmapstr)r   r   r   r   r   __str__   s   
zRobotFileParser.__str__N)r   )__name__
__module____qualname____doc__r   r   r   r
   r!   r,   r   rJ   rN   rP   rQ   rU   r   r   r   r   r      s    
			I

c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r9   zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c                 C   s<   |dkr|sd}t jt j|}t j|| _|| _d S )Nr   T)r   r   rB   r   rF   r   rH   )r   r   rH   r   r   r   r      s
   
zRuleLine.__init__c                 C   s   | j dkp
|| j S r'   )r   
startswith)r   filenamer   r   r   rG      s   zRuleLine.applies_toc                 C   s   | j rdndd | j S )NZAllowZDisallowz: )rH   r   r   r   r   r   rU      s   zRuleLine.__str__N)rV   rW   rX   rY   r   rG   rU   r   r   r   r   r9      s
    r9   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )r1   z?An entry has one or more user-agents and zero or more rulelinesc                 C   s   g | _ g | _d | _d | _d S rL   )r)   r8   r<   r=   r   r   r   r   r      s   
zEntry.__init__c                 C   s   g }| j D ]
}|d|  q| jd ur|d| j  | jd ur3| j}|d|j d|j  |tt| j	 d
|S )NzUser-agent: zCrawl-delay: zRequest-rate: r0   
)r)   r*   r<   r=   ZrequestsZsecondsextendrS   rT   r8   rR   )r   retagentZrater   r   r   rU      s   



zEntry.__str__c                 C   sF   | dd  }| jD ]}|dkr dS | }||v r  dS qdS )z2check if this entry applies to the specified agentr0   r   r(   TF)r4   r6   r)   )r   rI   r_   r   r   r   rG      s   
zEntry.applies_toc                 C   s$   | j D ]}||r|j  S qdS )zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r8   rG   rH   )r   r[   r@   r   r   r   rH   
  s
   


zEntry.allowanceN)rV   rW   rX   rY   r   rU   rG   rH   r   r   r   r   r1      s    r1   )rY   collectionsurllib.parser   Zurllib.request__all__
namedtupler   r   r9   r1   r   r   r   r   <module>   s     B