
    3 d                         d dl Z d dlZd dlmZ d dlmZ d dlmZ  e j        e	          Z
 G d d          Z ej        d          Zd ZdS )	    N)html)NotConfigured)HtmlResponsec                   :    e Zd ZdZd Zed             Zd Zd ZdS )AjaxCrawlMiddlewarez
    Handle 'AJAX crawlable' pages marked as crawlable via meta tag.
    For more info see https://developers.google.com/webmasters/ajax-crawling/docs/getting-started.
    c                 t    |                     d          st          |                    dd          | _        d S )NAJAXCRAWL_ENABLEDAJAXCRAWL_MAXSIZEi   )getboolr   getintlookup_bytes)selfsettingss     Flib/python3.11/site-packages/scrapy/downloadermiddlewares/ajaxcrawl.py__init__zAjaxCrawlMiddleware.__init__   s>     344 	  %OO,?GG    c                 "     | |j                   S )N)r   )clscrawlers     r   from_crawlerz AjaxCrawlMiddleware.from_crawler   s    s7#$$$r   c                 <   t          |t                    r|j        dk    r|S |j        dk    r|S d|j        v r|S |                     |          s|S |                    |j        dz             }t          	                    d||dd|i	           d
|j        d<   |S )N   GETajax_crawlablez#!)urlzHDownloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s)ajax_crawl_requestrequestspider)extraT)

isinstancer   statusmethodmeta_has_ajax_crawlable_variantreplacer   loggerdebug)r   r   responser   r   s        r   process_responsez$AjaxCrawlMiddleware.process_response    s    (L11 	X_5K 	O>U" 	Ow|+ 	O//99 	O %__t1C_DDV#5'JJV$ 	 	
 	
 	
 59 01!!r   c                 H    |j         d| j                 }t          |          S )z
        Return True if a page without hash fragment could be "AJAX crawlable"
        according to https://developers.google.com/webmasters/ajax-crawling/docs/getting-started.
        N)textr   _has_ajaxcrawlable_meta)r   r(   bodys      r   r$   z/AjaxCrawlMiddleware._has_ajax_crawlable_variant:   s'    
 }0t001&t,,,r   N)	__name__
__module____qualname____doc__r   classmethodr   r)   r$    r   r   r   r      si         
H H H % % [%" " "4- - - - -r   r   z8<meta\s+name=["\']fragment["\']\s+content=["\']!["\']/?>c                     d| vrdS d| vrdS t          j        | d          } t          j        |           } t          j        |           } t                              |           duS )a  
    >>> _has_ajaxcrawlable_meta('<html><head><meta name="fragment"  content="!"/></head><body></body></html>')
    True
    >>> _has_ajaxcrawlable_meta("<html><head><meta name='fragment' content='!'></head></html>")
    True
    >>> _has_ajaxcrawlable_meta('<html><head><!--<meta name="fragment"  content="!"/>--></head><body></body></html>')
    False
    >>> _has_ajaxcrawlable_meta('<html></html>')
    False
    fragmentFcontent)scriptnoscriptN)r   remove_tags_with_contentreplace_entitiesremove_comments_ajax_crawlable_research)r+   s    r   r,   r,   I   st      u u(/EFFD &&D%%D$$T**$66r   )loggingrew3libr   scrapy.exceptionsr   scrapy.httpr   	getLoggerr.   r&   r   compiler<   r,   r3   r   r   <module>rE      s     				       + + + + + + $ $ $ $ $ $		8	$	$4- 4- 4- 4- 4- 4- 4- 4-p  RZ?  
7 7 7 7 7r   