o
    ñ+óeü7  ã                   @  sÆ   d Z ddlmZ ddlZddlZddlZddlmZm	Z	 er$ddl
mZ ej d¡Zej e¡Zej e¡ eejd< e d¡e_e d	¡e_eje_e d
ej¡e_e d¡ZG dd„ dejƒZdS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
é    )ÚannotationsN)ÚTYPE_CHECKINGÚSequence)ÚMarkdownzhtml.parserÚ
htmlparserz\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a”  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                      s  e Zd ZU dZd@‡ fdd„Z‡ fdd„Z‡ fdd	„ZedAdd„ƒZdBdd„Z	dCdd„Z
dDdd„ZdEdd„ZdFdd„ZdGdd „ZdEd!d"„ZdHd$d%„ZdHd&d'„ZdFd(d)„ZdFd*d+„ZdFd,d-„ZdFd.d/„ZdI‡ fd1d2„ZdI‡ fd3d4„ZdJdK‡ fd7d8„Zd9Zd:ed;< dLd<d=„ZdId>d?„Z‡  ZS )MÚHTMLExtractorzû
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    Úmdr   c                   s@   d|vrd|d< t dgƒ| _dg| _tƒ j|i |¤Ž || _d S )NÚconvert_charrefsFÚhrr   )ÚsetÚ
empty_tagsÚlineno_start_cacheÚsuperÚ__init__r   )Úselfr   ÚargsÚkwargs©Ú	__class__© ú3lib/python3.10/site-packages/markdown/htmlparser.pyr   S   s   
zHTMLExtractor.__init__c                   s4   d| _ d| _g | _g | _g | _dg| _tƒ  ¡  dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)ÚinrawÚintailÚstackÚ_cacheÚcleandocr   r   Úreset©r   r   r   r   r   `   s   zHTMLExtractor.resetc                   sv   t ƒ  ¡  t| jƒr | jr| js|  t | j¡¡ n|  | j¡ t| j	ƒr9| j
 | jj d | j	¡¡¡ g | _	dS dS )zHandle any buffered data.Ú N)r   ÚcloseÚlenÚrawdatar	   Z
cdata_elemÚhandle_datar   Úunescaper   r   Úappendr   Ú	htmlStashÚstoreÚjoinr   r   r   r   r   k   s   



þzHTMLExtractor.closeÚreturnÚintc                 C  sj   t t| jƒd | jd ƒD ]}| j| }| j d|¡}|dkr$t| jƒ}| j |d ¡ q| j| jd  S )zHReturns char index in `self.rawdata` for the start of the current line. é   Ú
éÿÿÿÿ)Úranger    r   Úlinenor!   Úfindr$   )r   ÚiiZlast_line_start_posZlf_posr   r   r   Úline_offsetz   s   

zHTMLExtractor.line_offsetÚboolc                 C  s<   | j dkrdS | j dkrdS | j| j| j| j  …  ¡ dkS )z†
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   Té   Fr   )Úoffsetr!   r1   Ústripr   r   r   r   Úat_line_start‡   s
   

 zHTMLExtractor.at_line_startÚtagÚstrc                 C  s<   | j | j }tj | j|¡}|r| j|| ¡ … S d |¡S )z™
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r1   r4   r   Z	endendtagÚsearchr!   ÚendÚformat)r   r7   ÚstartÚmr   r   r   Úget_endtag_text”   s
   
zHTMLExtractor.get_endtag_textÚattrsúSequence[tuple[str, str]]c                 C  sœ   || j v r|  ||¡ d S | j |¡r&| js|  ¡ r&| js&d| _| j d¡ |  	¡ }| jr;| j
 |¡ | j |¡ d S | j |¡ || jv rL|  ¡  d S d S )NTr+   )r   Úhandle_startendtagr   Úis_block_levelr   r6   r   r   r$   Úget_starttag_textr   r   ÚCDATA_CONTENT_ELEMENTSZclear_cdata_mode)r   r7   r?   Útextr   r   r   Úhandle_starttag£   s   
 
þzHTMLExtractor.handle_starttagc                 C  sÖ   |   |¡}| jrc| j |¡ || jv r!| jr!| j ¡ |krn| jst| jƒdkrat | j	| j
| j t|ƒ d … ¡rA| j d¡ nd| _d| _| j | jj d | j¡¡¡ | j d¡ g | _d S d S | j |¡ d S )Nr   r+   TFr   ú

)r>   r   r   r$   r   Úpopr    Úblank_line_reÚmatchr!   r1   r4   r   r   r   r%   r&   r'   )r   r7   rE   r   r   r   Úhandle_endtag¸   s$   

þ$
ózHTMLExtractor.handle_endtagÚdatac                 C  s:   | j r
d|v r
d| _ | jr| j |¡ d S | j |¡ d S )Nr+   F)r   r   r   r$   r   ©r   rL   r   r   r   r"   Ó   s
   zHTMLExtractor.handle_dataÚis_blockc                 C  sÆ   | j s| jr| j |¡ dS |  ¡ r[|r[t | j| j| j	 t
|ƒ d… ¡r+|d7 }nd| _| jr6| jd nd}| d¡sH| d¡rH| j d¡ | j | jj |¡¡ | j d¡ dS | j |¡ dS )z Handle empty tags (`<data>`). Nr+   Tr,   r   rG   )r   r   r   r$   r6   rI   rJ   r!   r1   r4   r    r   Úendswithr   r%   r&   )r   rL   rN   Úitemr   r   r   Úhandle_empty_tagÛ   s   $
zHTMLExtractor.handle_empty_tagc                 C  s   | j |  ¡ | j |¡d d S )N©rN   )rQ   rC   r   rB   )r   r7   r?   r   r   r   rA   ò   s   z HTMLExtractor.handle_startendtagÚnamec                 C  ó   | j d |¡dd d S )Nz&#{};FrR   ©rQ   r;   ©r   rS   r   r   r   Úhandle_charrefõ   ó   zHTMLExtractor.handle_charrefc                 C  rT   )Nz&{};FrR   rU   rV   r   r   r   Úhandle_entityrefø   rX   zHTMLExtractor.handle_entityrefc                 C  rT   )Nz	<!--{}-->TrR   rU   rM   r   r   r   Úhandle_commentû   rX   zHTMLExtractor.handle_commentc                 C  rT   )Nz<!{}>TrR   rU   rM   r   r   r   Úhandle_declþ   rX   zHTMLExtractor.handle_declc                 C  rT   )Nz<?{}?>TrR   rU   rM   r   r   r   Ú	handle_pi  rX   zHTMLExtractor.handle_pic                 C  s,   |  d¡rdnd}| jd ||¡dd d S )NzCDATA[z]]>z]>z<![{}{}TrR   )Ú
startswithrQ   r;   )r   rL   r:   r   r   r   Úunknown_decl  s   zHTMLExtractor.unknown_declÚic                   ó,   |   ¡ s| jrtƒ  |¡S |  d¡ |d S )Nz<?é   )r6   r   r   Úparse_pir"   ©r   r_   r   r   r   rb     ó   
zHTMLExtractor.parse_pic                   r`   )Nz<!ra   )r6   r   r   Úparse_html_declarationr"   rc   r   r   r   re     rd   z$HTMLExtractor.parse_html_declarationr   Úreportc                   s6   t ƒ  ||¡}|dkrdS | j| j||… dd |S )Nr,   FrR   )r   Úparse_bogus_commentrQ   r!   )r   r_   rf   Úposr   r   r   rg     s
   z!HTMLExtractor.parse_bogus_commentNz
str | NoneÚ_HTMLExtractor__starttag_textc                 C  s   | j S )z)Return full source of start tag: `<...>`.)ri   r   r   r   r   rC   '  s   zHTMLExtractor.get_starttag_textc                 C  sê  d | _ |  |¡}|dk r|S | j}|||… | _ g }tj ||d ¡}|s)J dƒ‚| ¡ }| d¡ ¡  | _	}||k r™tj
 ||¡}|sEnT| ddd¡\}	}
}|
sTd }n-|d d… d  krf|dd … ks{n |d d… d  kry|dd … krn n|dd… }|rˆt |¡}| |	 ¡ |f¡ | ¡ }||k s;|||…  ¡ }|d	vrÖ|  ¡ \}}d
| j v rÄ|| j  d
¡ }t| j ƒ| j  d
¡ }n|t| j ƒ }|  |||… ¡ |S | d¡rã|  ||¡ |S || jv rí|  |¡ |  ||¡ |S )Nr   r*   z#unexpected call to parse_starttag()ra   r3   ú'r,   ú")ú>ú/>r+   rm   )ri   Zcheck_for_whole_start_tagr!   r   Ztagfind_tolerantrJ   r:   ÚgroupÚlowerZlasttagZattrfind_tolerantr#   r$   r5   ZgetposÚcountr    Úrfindr"   rO   rA   rD   Zset_cdata_moderF   )r   r_   Úendposr!   r?   rJ   Úkr7   r=   ÚattrnameÚrestZ	attrvaluer:   r.   r4   r   r   r   Úparse_starttag+  sX   
&(
ó

ÿ

ý
zHTMLExtractor.parse_starttag)r   r   )r(   r)   )r(   r2   )r7   r8   r(   r8   )r7   r8   r?   r@   )r7   r8   )rL   r8   )rL   r8   rN   r2   )rS   r8   )r_   r)   r(   r)   )r   )r_   r)   rf   r)   r(   r)   )r(   r8   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   Úpropertyr1   r6   r>   rF   rK   r"   rQ   rA   rW   rY   rZ   r[   r\   r^   rb   re   rg   ri   Ú__annotations__rC   rv   Ú__classcell__r   r   r   r   r   J   s4   
 













r   )rz   Z
__future__r   ÚreÚimportlib.utilÚ	importlibÚsysÚtypingr   r   Zmarkdownr   ÚutilÚ	find_specÚspecÚmodule_from_specr   ÚloaderÚexec_moduleÚmodulesÚcompileZpicloseZ	entityrefZ
incompleteÚVERBOSEZlocatestarttagend_tolerantrI   Z
HTMLParserr   r   r   r   r   Ú<module>   s(   
ò
