
    3 dC+                         d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ  ej        e          Z G d de          ZdS )    N)Dict)ItemAdapteris_item)maybeDeferred)is_url)BaseRunSpiderCommand)
UsageError)Request)display)iterate_spider_outputspidercls_for_requestc                       e Zd ZU dZdZi Zeeef         e	d<   i Z
eeef         e	d<   dZd Zd Zd Zed             Zd	 Zd
 ZddZddZd Zd ZddZd Zd Zd Zd Zd Zd Zd Zd Zd Z dS )CommandTNitemsrequestsc                     dS )Nz[options] <url> selfs    5lib/python3.11/site-packages/scrapy/commands/parse.pysyntaxzCommand.syntax   s          c                     dS )Nz2Parse URL (using its spider) and print the resultsr   r   s    r   
short_desczCommand.short_desc   s    CCr   c                 d   t          j        | |           |                    ddd d           |                    ddd           |                    d	d
dd           |                    dddd           |                    dddd           |                    ddddd           |                    dddd           |                    dddd           |                    d d!d"           |                    d#d$d%t          d&d'(           |                    d)d*d+dd,           d S )-Nz--spiderspiderz'use this spider without looking for one)destdefaulthelpz--pipelines
store_truezprocess items through pipelines)actionr   z	--nolinksnolinksz/don't show links to follow (extracted requests))r   r!   r   z	--noitemsnoitemszdon't show scraped itemsz
--nocolournocolourz+avoid using pygments to colorize the outputz-rz--rulesrulesz.use CrawlSpider rules to discover the callbackz-cz
--callbackcallbackz=use this callback for parsing, instead looking for a callback)r   r   z-mz--metametazFinject extra meta into the Request, it must be a valid raw json stringz
--cbkwargscbkwargszQinject extra callback kwargs into the Request, it must be a valid raw json stringz-dz--depthdepth   z9maximum depth for parsing requests [default: %(default)s])r   typer   r   z-vz	--verboseverbosez!print each depth level one by one)r   add_optionsadd_argumentint)r   parsers     r   r-   zCommand.add_options!   s   (v666:	 	 	
 	
 	
 	,5V 	 	
 	
 	
 	B	 	 	
 	
 	
 	+	 	 	
 	
 	
 	>	 	 	
 	
 	
 	A 	 	
 	
 	
 	P	 	 	
 	
 	
 	Y	 	 	
 	
 	
 	d 	 	
 	
 	

 	L 	 	
 	
 	
 	4 	 	
 	
 	
 	
 	
r   c                     d\  }}| j         rt          | j                   }| j        rt          | j                  }t          ||          S )N)r   r   )r   maxr   )r   	max_itemsmax_requestss      r   	max_levelzCommand.max_levelf   sL    "&	<: 	(DJI= 	.t}--L9l+++r   c                 V    | j                             |g           }||z   | j         |<   d S N)r   get)r   lvl	new_items	old_itemss       r   	add_itemszCommand.add_itemso   s+    JNN3++	#i/
3r   c                 V    | j                             |g           }||z   | j         |<   d S r7   )r   r8   )r   r9   new_reqsold_reqss       r   add_requestszCommand.add_requestss   s0    =$$S"--%0cr   c                     |$d | j                                         D             }n| j                             |g           }t          dd           t	          j        d |D             |           d S )Nc                     g | ]	}|D ]}|
S r   r   ).0lstitems      r   
<listcomp>z'Command.print_items.<locals>.<listcomp>y   s%    IIIcSIITTIIIIr   z# Scraped Items z<------------------------------------------------------------c                 P    g | ]#}t          |                                          $S r   )r   asdict)rC   xs     r   rF   z'Command.print_items.<locals>.<listcomp>~   s*    ???AA--//???r   colorize)r   valuesr8   printr   pprint)r   r9   colourr   s       r   print_itemszCommand.print_itemsw   s}     	,IITZ%6%6%8%8IIIEEJNN3++E (+++?????&QQQQQQr   c                     |*| j         r | j         t          | j                            }ng }n| j                             |g           }t          dd           t	          j        ||           d S )Nz# Requests zA-----------------------------------------------------------------rJ   )r   r2   r8   rM   r   rN   )r   r9   rO   r   s       r   print_requestszCommand.print_requests   su     	2} =T]););<}((b11HmX&&&x&111111r   c                    |j          }|j        rjt          d| j        dz             D ]O}t	          d| d           |j        s|                     ||           |j        s|                     ||           Pd S t	          d| j         d           |j        s|                     |           |j        s|                     |           d S d S )Nr*   z
>>> DEPTH LEVEL: z <<<z
>>> STATUS DEPTH LEVEL )rO   )	r$   r,   ranger5   rM   r#   rP   r"   rR   )r   optsrO   levels       r   print_resultszCommand.print_results   s   ]"< 	3q$.1"455 7 77E777888| 4$$UF333| 7''v6667 7 BdnBBBCCC< 0   ///< 3##6#222223 3r   c                     g g }}|D ]Q}t          |          r|                    |           't          |t                    r|                    |           R||||||fS r7   )r   append
isinstancer
   )	r   spider_outputrU   r)   r   r&   r   r   rI   s	            r   _get_items_and_requestszCommand._get_items_and_requests   sq    bx 	# 	#Aqzz #QAw'' #"""heVX==r   c                 F    |pi }t          t           ||fi |          }|S r7   )r   r   )r   responser&   	cb_kwargsds        r   run_callbackzCommand.run_callback   s2    O	/(1P1Pi1P1PQQr   c                     t          |dd           r6|j        D ],}|j                            |j                  r|j        pdc S -d S t                              dd|j        i           d S )Nr%   parsez]No CrawlSpider rules found in spider %(spider)r, please specify a callback to use for parsingr   )	getattrr%   link_extractormatchesurlr&   loggererrorname)r   r   r^   rules       r   get_callback_from_ruleszCommand.get_callback_from_rules   s    67D)) 		 4 4&..x|<< 4=3G33344 4 LL?6;'    r   c                      j         j        }j        rS	 |                    j                   _        nx# t
          $ r% t                              ddj        i           Y nJw xY wt          |t                               _         j        st                              ddi            fd} j        r| j        _
        d S d S )Nz!Unable to find spider: %(spider)sr   z"Unable to find spider for: %(url)srg   c              3   X   K                        | t                              V  d S r7   )prepare_requestr
   )r   rU   r   rg   s    r   _start_requestsz.Command.set_spidercls.<locals>._start_requests   s1      &&vws||TBBBBBBBr   )crawler_processspider_loaderr   load	spiderclsKeyErrorrh   ri   r   r
   start_requests)r   rg   rU   rr   rp   s   ```  r   set_spiderclszCommand.set_spidercls   s   ,:; 
	Q!.!3!3DK!@!@   7(DK9P    
 3='#,,OODN> QAE3<PPP	C 	C 	C 	C 	C 	C 	C > 	<,;DN)))	< 	<s   8 ,A'&A'c                     | j         j        | j        fi |j         t	          | j         j                  d         | _        | j                                          | j        st          
                    dd|i           d S d S )Nr   z#No response downloaded for: %(url)srg   )rq   crawlrt   spargslistcrawlerspcrawlerstartfirst_responserh   ri   )r   rg   rU   s      r   start_parsingzCommand.start_parsing   s    ""4>AAT[AAAT1:;;A>""$$$" 	NLL>MMMMM	N 	Nr   c                 f   |\  }}}}}}|j         r1| j        j        j        j        }|D ]}	|                    |	|           |                     ||           |                     ||           |j        r|ng }
||j	        k     r-|D ]%}|dz   |j
        d<   |j        |j
        d<   ||_        &|
|z  }
|
S )Nr*   _depth	_callback)	pipelinesr}   enginescraperitemprocprocess_itemr<   r@   outputr)   r'   r&   )r   argsr   r   rU   r)   r   r&   r   rE   scraped_datareqs               r   r   zCommand.scraped_data   s    9=6xufh> 	4}+3<H 4 4%%dF3333ue$$$%*** $3uu4: 	% ( (%*QY"(+%'H$Lr   c                       fdj         r|j                             j                    j        r|j                            j                   d|j         d<   |j        |j         d<   |_        |S )Nc                 t   j         s| _         | j        d         }|sfj        rj        }nWj        rNj         | k    rC                    	|           }|s*t
                              d| j        	j        d           d S nd}t          |          sHt          	|d           }t          |          r|}n%t
                              d|	j        d           d S | j        d         }                    | ||          }|                    j        |	           |                    j                   |S )Nr   z=Cannot find a rule that matches %(url)r in spider: %(spider)s)rg   r   rc   z7Cannot find callback %(callback)r in spider: %(spider)s)r&   r   r   )r   r'   r&   r%   rl   rh   ri   rg   rj   callablerd   ra   addCallbackr\   r   )
r^   r_   cb	cb_methodr)   r`   r&   rU   r   r   s
         r   r&   z)Command.prepare_request.<locals>.callback   so   & /&.# {+B != !BBZ 
!D$78$C 
!55fhGGB [$,LFKHH    !BB<< 	#FB55	I&& "BBLLQ%'6;??   F M(+E!!(B	::AMM$6eVXVVVMM$+,,,Hr   r*   r   r   )r'   updater(   r_   r&   )r   r   requestrU   r&   s   `` `@r   ro   zCommand.prepare_request   s    '	 '	 '	 '	 '	 '	 '	 '	T 9 	+L	*** = 	4$$T]333!"X$+$4[!#r   c                     t          j        | ||           |                     |           |                     |           d S r7   )r   process_optionsprocess_request_metaprocess_request_cb_kwargs)r   r   rU   s      r   r   zCommand.process_options  sE    ,T4>>>!!$'''&&t,,,,,r   c                     |j         r@	 t          j        |j                   |_         d S # t          $ r t	          dd          w xY wd S )NzdInvalid -m/--meta value, pass a valid json string to -m or --meta. Example: --meta='{"foo" : "bar"}'F
print_help)r'   jsonloads
ValueErrorr	   r   rU   s     r   r   zCommand.process_request_meta#  sl    9 	 Jty11			    :$   	 		   ) Ac                     |j         r@	 t          j        |j                   |_         d S # t          $ r t	          dd          w xY wd S )NzgInvalid --cbkwargs value, pass a valid json string to --cbkwargs. Example: --cbkwargs='{"foo" : "bar"}'Fr   )r(   r   r   r   r	   r   s     r   r   z!Command.process_request_cb_kwargs.  sl    = 	 $
4= 9 9    >$   	 	r   c                 0   t          |          dk    rt          |d                   st                      |d         }|                     ||           | j        r8|j        dk    r/|                     ||           |                     |           d S d S d S )Nr*   r   )lenr   r	   rw   rt   r)   r   rW   )r   r   rU   rg   s       r   runzCommand.run9  s    4yyA~ 	VDG__ 	,,q'C 	3%%%> 	%dj1n 	%sD)))t$$$$$	% 	% 	% 	%r   )NTr7   )!__name__
__module____qualname__requires_projectr   r   r   r/   r{   __annotations__r   r   r   r   r-   propertyr5   r<   r@   rP   rR   rW   r\   ra   rl   rw   r   r   ro   r   r   r   r   r   r   r   r   r      s        FE4T	? "Hd39o"""N! ! !D D DC
 C
 C
J , , X,0 0 01 1 1R R R R
2 
2 
2 
23 3 3"> > >   

 
 
< < <(N N N  &5 5 5n- - -	 	 		 	 	% % % % %r   r   )r   loggingtypingr   itemadapterr   r   twisted.internet.deferr   	w3lib.urlr   scrapy.commandsr   scrapy.exceptionsr	   scrapy.httpr
   scrapy.utilsr   scrapy.utils.spiderr   r   	getLoggerr   rh   r   r   r   r   <module>r      s           , , , , , , , , 0 0 0 0 0 0       0 0 0 0 0 0 ( ( ( ( ( (                   L L L L L L L L		8	$	$s% s% s% s% s%" s% s% s% s% s%r   