o
    ǋ|cC                     @   s   d Z ddlZddlZddlmZ dgZedZedZedZ	edZ
ed	Zed
ZedZedZedZedejZed
ZedZG dd dejZdS )zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   s   e Zd ZdZdZddddZdd Zd	d
 Zdd ZdZ	dd Z
dd Zdd Zdd Zdd Zd7ddZdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 ZdS )8r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptstyleT)convert_charrefsc                C   s   || _ |   dS )zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   reset)selfr    r	     /home/conda/feedstock_root/build_artifacts/python-split_1669104742391/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehol/lib/python3.10/html/parser.py__init__V   s   zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d| _tj|  dS )z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem_markupbase
ParserBaser   r   r	   r	   r
   r   _   s
   zHTMLParser.resetc                 C   s   | j | | _ | d dS )zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datar	   r	   r
   feedg   s   zHTMLParser.feedc                 C   s   |  d dS )zHandle any buffered data.   N)r   r   r	   r	   r
   closep   s   zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr   r	   r	   r
   get_starttag_textv   s   zHTMLParser.get_starttag_textc                 C   s$   |  | _td| j tj| _d S )Nz</\s*%s\s*>)lowerr   recompileIr   )r   elemr	   r	   r
   set_cdata_modez   s   
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S N)r   r   r   r   r	   r	   r
   clear_cdata_mode~   s   
zHTMLParser.clear_cdata_modec                 C   s:  | j }d}t|}||k r| jr;| js;|d|}|dk r:|dt||d }|dkr8td	||s8n|}n| j
	||}|rI| }n| jrNn|}||k ro| jrf| jsf| t|||  n	| |||  | ||}||kr{nj|j}|d|rt||r| |}	n>|d|r| |}	n3|d|r| |}	n(|d|r| |}	n|d	|r| |}	n|d
 |k r| d |d
 }	nn|	dk r|sאn|d|d
 }	|	dk r|d|d
 }	|	dk r|d
 }	n|	d
7 }	| jr| js| t|||	  n	| |||	  | ||	}n|d|rlt||}|rO| dd }
| |
 | }	|d|	d
 sH|	d
 }	| ||	}q	d||d  v rk| |||d   | ||d }ny|d|rt||}|r|d
}
| |
 | }	|d|	d
 s|	d
 }	| ||	}q	t||}|r|r| ||d  kr| }	|	|kr|}	| ||d
 }n|d
 |k r| d | ||d
 }nnJ d||k s|r||k r| js| jr| js| t|||  n	| |||  | ||}||d  | _ d S )Nr   <&"   z[\s;]</<!--<?<!r   r   z&#   ;zinteresting.search() lied)r   lenr   r   findrfindmaxr   r   searchr   starthandle_datar   Z	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rA   r   injZampposr8   r6   knamer	   r	   r
   r      s   













kzHTMLParser.goaheadc                 C   s   | j }|||d  dksJ d|||d  dkr | |S |||d  dkr/| |S |||d   d	krX|d
|d }|dkrIdS | ||d |  |d S | |S )Nr,   r+   z+unexpected call to parse_html_declaration()   r)      z<![	   z	<!doctyper   r-   r   )r   r;   Zparse_marked_sectionr   r0   handle_declparse_bogus_comment)r   rE   r   gtposr	   r	   r
   r=      s   


z!HTMLParser.parse_html_declarationr   c                 C   s`   | j }|||d  dv sJ d|d|d }|dkrdS |r,| ||d |  |d S )Nr,   )r+   r(   z"unexpected call to parse_comment()r   r-   r   )r   r0   handle_comment)r   rE   reportr   posr	   r	   r
   rN     s   zHTMLParser.parse_bogus_commentc                 C   sd   | j }|||d  dksJ dt||d }|sdS | }| ||d |  | }|S )Nr,   r*   zunexpected call to parse_pi()r-   )r   picloser3   r4   	handle_pirA   )r   rE   r   r8   rG   r	   r	   r
   r<      s   zHTMLParser.parse_pic                 C   s  d | _ | |}|dk r|S | j}||| | _ g }t||d }|s(J d| }|d  | _}||k rt	||}|sCnS|ddd\}	}
}|
sRd }n-|d d d  krd|dd  ksyn |d d d  krw|dd  krn n|dd }|rt
|}||	 |f | }||k s:|||  }|d	vr|  \}}d
| j v r|| j d
 }t| j | j d
 }n|t| j  }| |||  |S |dr| || |S | || || jv r| | |S )Nr   r   z#unexpected call to parse_starttag()r,   rK   'r-   ")r   />
rW   )r   check_for_whole_start_tagr   tagfind_tolerantr8   rA   r?   r   r   attrfind_tolerantr   appendstripZgetposcountr/   r1   r5   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr"   )r   rE   endposr   attrsr8   rH   tagmattrnamerestZ	attrvaluerA   linenooffsetr	   r	   r
   r9   ,  sX   
&(




zHTMLParser.parse_starttagc                 C   s   | j }t||}|rU| }|||d  }|dkr|d S |dkr?|d|r-|d S |d|r5dS ||kr;|S |d S |dkrEdS |dv rKdS ||krQ|S |d S td	)
Nr   r   /rW   r,   r-   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   locatestarttagend_tolerantr8   rA   r6   AssertionError)r   rE   r   rf   rG   nextr	   r	   r
   rY   _  s.   z$HTMLParser.check_for_whole_start_tagc                 C   s*  | j }|||d  dksJ dt||d }|sdS | }t||}|sn| jd ur9| |||  |S t||d }|sV|||d  dkrQ|d S | 	|S |
d }|d| }| | |d S |
d }| jd ur|| jkr| |||  |S | | |   |S )	Nr,   r(   zunexpected call to parse_endtagr   r-   rK   z</>r   )r   	endendtagr3   rA   
endtagfindr8   r   r5   rZ   rN   r?   r   r0   handle_endtagr$   )r   rE   r   r8   rO   Z	namematchZtagnamer!   r	   r	   r
   r:     s8   





zHTMLParser.parse_endtagc                 C   s   |  || | | d S r#   )ra   rq   r   re   rd   r	   r	   r
   r`     s   zHTMLParser.handle_startendtagc                 C      d S r#   r	   rr   r	   r	   r
   ra        zHTMLParser.handle_starttagc                 C   rs   r#   r	   )r   re   r	   r	   r
   rq     rt   zHTMLParser.handle_endtagc                 C   rs   r#   r	   r   rI   r	   r	   r
   r@     rt   zHTMLParser.handle_charrefc                 C   rs   r#   r	   ru   r	   r	   r
   rC     rt   zHTMLParser.handle_entityrefc                 C   rs   r#   r	   r   r	   r	   r
   r5     rt   zHTMLParser.handle_datac                 C   rs   r#   r	   r   r	   r	   r
   rP     rt   zHTMLParser.handle_commentc                 C   rs   r#   r	   )r   Zdeclr	   r	   r
   rM     rt   zHTMLParser.handle_declc                 C   rs   r#   r	   r   r	   r	   r
   rT     rt   zHTMLParser.handle_pic                 C   rs   r#   r	   r   r	   r	   r
   unknown_decl  rt   zHTMLParser.unknown_decl)r   )__name__
__module____qualname____doc__rb   r   r   r   r   r   r   r"   r$   r   r=   rN   r<   r9   rY   r:   r`   ra   rq   r@   rC   r5   rP   rM   rT   rv   r	   r	   r	   r
   r   >   s8    		z
3"()rz   r   r   Zhtmlr   __all__r   r   rD   rB   r>   r7   rS   ZcommentcloserZ   r[   VERBOSErl   ro   rp   r   r   r	   r	   r	   r
   <module>   s,    










