
    z|aN                     0   d Z ddlZddlZddlZ ej        dded           ddlmZmZ ddlm	Z	 dd	l
mZmZ dd
l
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZ e	j        Z ee          Z e	j!        d         e	j!        d         e	j!        d         hZ"e	j!        d         Z#e	j!        d         Z$g dZ% G d d          Z& G d de          Z' G d de          Z(d Z)d Z*d Z+ ej,        d           Z-d! Z. G d" d#e          Z/dS )$z
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
    Nignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)messagecategorymodule)
HTMLParsergetTreeWalker)	constants)
namespacesprefixes)_ReparseException)Filter)allowed_protocols)HTMLInputStream)escapeHTMLSerializer)attributeMapHTMLTokenizer)TrieStartTagEndTagEmptyTag
Characters
ParseError)paabbraddressareaarticleasideaudiobbasebdibdo
blockquotebodybrbuttoncanvascaptioncitecodecolcolgroupdatadatalistdddeldetailsdfndialogdivdldtemembedfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headheaderhgrouphrhtmliiframeimginputinskbdkeygenlabellegendlilinkmapmarkmenumetameternavnoscriptobjectoloptgroupoptionoutputpparampicturepreprogressqrprtrubyssampscriptsectionselectslotsmallsourcespanstrongstylesubsummarysuptabletbodytdtemplatetextareatfootththeadtimetitletrtrackuulvarvideowbrc                   z    e Zd ZdZd Zed             Zed             Zed             Zd Z	ddZ
d	 Zd
 Zd ZdS )InputStreamWithMemoryzWraps an HTMLInputStream to remember characters since last <

    This wraps existing HTMLInputStream classes to keep track of the stream
    since the last < which marked an open tag state.

    c                 f    || _         | j         j        | _        | j         j        | _        g | _        d S N)_inner_streamresetposition_buffer)selfinner_streams     4lib/python3.11/site-packages/bleach/html5lib_shim.py__init__zInputStreamWithMemory.__init__   s/    )'-
*3    c                     | j         j        S r   )r   errorsr   s    r   r   zInputStreamWithMemory.errors   s    !((r   c                     | j         j        S r   )r   charEncodingr   s    r   r   z"InputStreamWithMemory.charEncoding   s    !..r   c                     | j         j        S r   )r   changeEncodingr   s    r   r   z$InputStreamWithMemory.changeEncoding   s    !00r   c                 p    | j                                         }|r| j                            |           |S r   )r   charr   append)r   cs     r   r   zInputStreamWithMemory.char   s9    ##%% 	#L"""r   Fc                     | j                             ||          }| j                            t	          |                     |S )N)opposite)r   
charsUntilr   extendlist)r   
charactersr   charss       r   r   z InputStreamWithMemory.charsUntil   s>    "--j8-LLDKK(((r   c                 x    | j         r| j                             d           | j                            |          S )N)r   popr   unget)r   r   s     r   r   zInputStreamWithMemory.unget   s9    < 	!LR   !''---r   c                 6    d                     | j                  S )zReturns the stream history since last '<'

        Since the buffer starts at the last '<' as as seen by tagOpenState(),
        we know that everything from that point to when this method is called
        is the "tag" that is being tokenized.

         )joinr   r   s    r   get_tagzInputStreamWithMemory.get_tag   s     wwt|$$$r   c                     dg| _         dS )zResets stream history to just '<'

        This gets called by tagOpenState() which marks a '<' that denotes an
        open tag. Any time we see that, we reset the buffer.

        <N)r   r   s    r   	start_tagzInputStreamWithMemory.start_tag   s     ur   NF)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r    r   r   r   r      s            ) ) X) / / X/ 1 1 X1     
. . .
% % %    r   r   c                   N     e Zd ZdZd	 fd	Z fdZd
 fd	Z fdZ fdZ xZ	S )BleachHTMLTokenizerz1Tokenizer that doesn't consume character entitiesFc                      t          t          |           j        di | || _        t	          | j                  | _        d S )Nr   )superr   r   consume_entitiesr   stream)r   r   kwargs	__class__s      r   r   zBleachHTMLTokenizer.__init__  sF    1!4((1;;F;;; 0 ,DK88r   c              #     K   d }t          t          |                                           D ]+}||d         dk    r_|d         t          v rP|                    d          r;t          d |d                                         D                       |d<   d }|V  n|d         dk    rq| j        j        e|d         	                                
                                | j        j        vr-| j                                        |d<   t          |d<   d }|V  n"|d         t          k    r|V  |}n
|V  |V  d }|d         t          k    r|}'|V  -|r|V  d S d S )Nr/   z#invalid-character-in-attribute-nametypec              3   <   K   | ]\  }}d |vrd|vr
d|v||fV  dS )"'r   Nr   ).0	attr_name
attr_values      r   	<genexpr>z/BleachHTMLTokenizer.__iter__.<locals>.<genexpr>  sZ       1 11Izy00 #9 4 4 #9 4 4 #J/
 !5 4 4 41 1r   z!expected-closing-tag-but-got-char)r   r   __iter__TAG_TOKEN_TYPESgetr   itemsparsertagslowerstripr   r   CHARACTERS_TYPEPARSEERROR_TYPE)r   last_error_tokentokenr   s      r   r   zBleachHTMLTokenizer.__iter__  s     .55>>@@ B	 B	E+$V,0UUUf88		&)) 9 %1 1 15:6]5H5H5J5J1 1 1 % %E&M (,$KKKK %V,0SSS(4f++--3355T[=MMM %)K$7$7$9$9E&M$3E&M'+$KKKK6]o55 +***',$$ +***KKK'+$ V}//#( KKKK 	#""""""	# 	#r   Nc                     | j         r)t          t          |                               ||          S |r#| j        d         d         dxx         dz  cc<   d S | j                            t          dd           d S )Nr/   r      &r   r/   )r   r   r   consumeEntitycurrentToken
tokenQueuer   r   )r   allowedCharfromAttributer   s      r   r   z!BleachHTMLTokenizer.consumeEntityU  s       	,d33AA]    	Kf%b)!,,,3,,,,, O""OS#I#IJJJJJr   c                     | j                                          t          t          |                                           S r   )r   r   r   r   tagOpenState)r   r   s    r   r   z BleachHTMLTokenizer.tagOpenStatei  s6    
 	($//<<>>>r   c                    | j         }| j        j        |d         t          v r|d                                         | j        j        vra| j        j        rd}n| j                                        }t          |d}|| _         | j	        
                    |           | j        | _        d S t          t          |                                            d S )Nr   namer   r   )r   r   r   r   r   r   r   r   r   r   r   	dataStatestater   r   emitCurrentToken)r   r   new_data	new_tokenr   s       r   r   z$BleachHTMLTokenizer.emitCurrentTokenq  s    ! K(f00f##%%T[-===
 {  1   ;..00!0(CCI )DO""9---DJF!4((99;;;;;r   r   )NF)
r   r   r   r   r   r   r   r   r   __classcell__r   s   @r   r   r      s        ;;9 9 9 9 9 9H# H# H# H# H#TK K K K K K(? ? ? ? ?< < < < < < < < <r   r   c                   ,     e Zd ZdZ fdZ	 ddZ xZS )BleachHTMLParserz$Parser that uses BleachHTMLTokenizerc                     |d |D             nd| _         || _        || _         t          t          |           j        di | dS )a  
        :arg tags: list of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        Nc                 6    g | ]}|                                 S r   )r   )r   tags     r   
<listcomp>z-BleachHTMLParser.__init__.<locals>.<listcomp>  s     111SSYY[[111r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   s        r   r   zBleachHTMLParser.__init__  s^     6:5E11D11114	
 0.%%.8888888r   Fr6   Tc                 (   || _         || _        || _        t          d|| j        | d|| _        |                                  	 |                                  d S # t          $ r, |                                  |                                  Y d S w xY w)N)r   r   r   r   )	innerHTMLMode	container	scriptingr   r   	tokenizerr   mainLoopReparseException)r   r   	innerHTMLr   r   r   s         r   _parsezBleachHTMLParser._parse  s     '"", 
D,A$
 
RX
 
 	

	MMOOOOO 	 	 	JJLLLMMOOOOOO	s   A 2BB)Fr6   T)r   r   r   r   r   r   r   r   s   @r   r   r     sY        ..9 9 9 9 9  CG       r   r   c                 2   | d         dk    rqt          |           dk     rdS | d         dv r| dd         d}}n| dd         d}}|d	k    rdS t          ||          }d|cxk     rd
k     rn nt          |          S dS t                              | d          S )a9  Convert an entity (minus the & and ; part) into what it represents

    This handles numeric, hex, and text entities.

    :arg value: the string (minus the ``&`` and ``;`` part) to convert

    :returns: unicode character or None if it's an ambiguous ampersand that
        doesn't match a character entity

    r   #   Nr   xX   
   r   i   )lenintchrENTITIESr   )valueint_as_stringr"   
code_points       r   convert_entityr
    s     Qx3u::>>48z!!"')R4MM #()R4MB4--
z$$$$H$$$$$z??"4<<t$$$r   c                    d| vr| S g }t          |           D ]}|s|                    d          rit          |          }|Xt          |          }|G|                    |           |t          |          dz   d         }|r|                    |           |                    |           d                    |          S )zConverts all found entities in the text

    :arg text: the text to convert entities in

    :returns: unicode text with converted entities

    r   Nr   r   )next_possible_entity
startswithmatch_entityr
  r   r  r   )textnew_textpartentity	converted	remainders         r   convert_entitiesr    s     $H$T**   	??3 	!$''F!*622	 (OOI... $S[[1_%6%6 7I 3 	222778r   c                    | d         dk    rt          d          | dd         } t          |           } d}dt          j        z   }| r| d         dk    rd}|                     d           | r%| d         d	v rd
}||                     d          z  }nd}| r5| d         |vr+|                     d          }||vrn||z  }| r
| d         |v+|r| r| d         dk    r|S dS | rK| d         |vrA|                     d          }t
                              |          sn||z  }| r
| d         |vA|r| r| d         dk    r|S dS )aH  Returns first entity in stream or None if no entity exists

    Note: For Bleach purposes, entities must start with a "&" and end with
    a ";". This ignoresambiguous character entities that have no ";" at the
    end.

    :arg stream: the character stream

    :returns: ``None`` or the entity string without "&" or ";"

    r   r   zStream should begin with "&"r   Nr   z<&=;r   r   0123456789abcdefABCDEF
0123456789;)
ValueErrorr   string
whitespacer   ENTITIES_TRIEhas_keys_with_prefix)r   possible_entityend_charactersallowedr   s        r   r  r    s    ayC7888ABBZF&\\FOf//N  &)s""

1 	#fQi:--.Gvzz!}},OO"G  	!.88

1Aq O	  	!.88  	#v 	#&)s*:*:""t  VAYn44JJqMM11/BB 	1	  VAYn44  6 fQi3&6&64r   z(&)c              #      K   t          t                              |                     D ] \  }}|dk    r|V  |dz  dk    rd|z   V  !dS )zTakes a text and generates a list of possible entities

    :arg text: the text to look at

    :returns: generator where each part (except the first) starts with an
        "&"

    r   r   r   N)	enumerateAMP_SPLIT_REsplit)r  rK   r  s      r   r  r  ?  sm       \//5566  466JJJJUaZZ*	 r   c                   .     e Zd ZdZdZd Zd fd	Z xZS )BleachHTMLSerializerz[HTMLSerializer that undoes & -> &amp; in attributes and sets
    escape_rcdata to True
    Tc              #   P  K   |                     dd          }t          |          D ]}}|s|                    d          rKt          |          }|:t	          |          +d|z   dz   V  |t          |          dz   d         }|r|V  e|                     dd          V  ~dS )z,Escapes just bare & in HTML attribute valuesz&amp;r   Nr  r   )replacer  r  r  r
  r  )r   stokenr  r  s       r   escape_base_ampz$BleachHTMLSerializer.escape_base_amp\  s       -- )00 	- 	-D s## %d++ %.*@*@*L,,,,,  Fa 1 12D #"


,,sG,,,,,,%	- 	-r   Nc              #   "  K   d}d}t          t          |                               ||          D ]]}|r>|dk    rd}n0|r&|dk    r|                     |          D ]}|V  d}4n|dk    rd}|V  B|                    d          rd}|V  ^dS )zWrap HTMLSerializer.serialize and conver & to &amp; in attribute values

        Note that this converts & to &amp; in attribute values where the & isn't
        already part of an unambiguous character entity.

        F>r   =Tr   N)r   r'  	serializer+  r  )r   
treewalkerencodingin_tagafter_equalsr*  r  r   s          r   r/  zBleachHTMLSerializer.serialize{  s       0$77AA*hWW 	 	F S=="FF! 	(}}$($8$8$@$@ ' 'D"&JJJJ',  % s]]#'L$$S)) "!F)	 	r   r   )r   r   r   r   escape_rcdatar+  r/  r   r   s   @r   r'  r'  O  s]          M- - ->         r   r'  )0r   rer  warningsfilterwarningsDeprecationWarningbleach._vendor.html5libr   r   r	   !bleach._vendor.html5lib.constantsr
   r   r   r   $bleach._vendor.html5lib.filters.baser   )bleach._vendor.html5lib.filters.sanitizerr   SanitizerFilter$bleach._vendor.html5lib._inputstreamr   "bleach._vendor.html5lib.serializerr   r   "bleach._vendor.html5lib._tokenizerr   r   bleach._vendor.html5lib._trier   entitiesr  r  
tokenTypesr   r   r   	HTML_TAGSr   r   r   r
  r  r  compiler$  r  r'  r   r   r   <module>rF     sS   
 
			    0$	                                                                    X $"$
 &|4&|4
q q q	h< < < < < < < <~P< P< P< P< P<- P< P< P<f' ' ' ' 'z ' ' 'T% % %D     F7 7 7t rz%     J J J J J> J J J J Jr   