
    z|asU                        d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ g dZdd	gd	gd	gd
Zg Zg dZd                    d  e ed d           edd           edd                    D                       Z ej        dez   dz   ej                  ZdZ G d d          Zd Z G d de	j                  ZdS )    )chainN)urlparse)unescape)html5lib_shim)alphabetize_attributes)aabbracronymb
blockquotecodeemiliolstrongulhreftitle)r   r	   r
   )httphttpsmailto c                 ,    g | ]}t          |          S  )chr).0cs     0lib/python3.11/site-packages/bleach/sanitizer.py
<listcomp>r    ,   s    FFFSVVFFF    	                []?c                   .    e Zd ZdZeeeedddfdZd Z	dS )Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc                 *   || _         || _        || _        || _        || _        || _        |pg | _        t          j        | j         | j        dd          | _	        t          j
        d          | _        t          j        dddddd          | _        dS )a  Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealwaysT)quote_attr_valuesomit_optional_tagsescape_lt_in_attrsresolve_entitiessanitizealphabetical_attributesN)r-   
attributesstyles	protocolsr.   strip_commentsfiltersr   BleachHTMLParserparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer)selfr-   r9   r:   r;   r.   r<   r=   s           r   __init__zCleaner.__init__T   s    L 	$"
,}"#4*""'	
 
 
 $1'::'<&$# #$)
 
 
r!   c           
         t          |t                    s/d                    |j        j                  }t          |          |sdS | j                            |          }t          | 	                    |          | j
        | j        | j        | j        | j        | j        g           }| j        D ]} ||          }| j                            |          S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        z9argument cannot be of '{name}' type, must be of text type)namer   )sourcer9   strip_disallowed_elementsstrip_html_commentsallowed_elementsallowed_css_propertiesallowed_protocolsallowed_svg_properties)rH   )
isinstancestrformat	__class____name__	TypeErrorr?   parseFragmentBleachSanitizerFilterrA   r9   r.   r<   r-   r:   r;   r=   rC   render)rD   textmessagedomfilteredfilter_classs         r   cleanzCleaner.clean   s     $$$ 	%KRR0 S   
 G$$$ 	2k''--(;;s##&*j $ 3!Y#';"n#%
 
 
 !L 	5 	5L#|8444HH%%h///r!   )
rS   
__module____qualname____doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_STYLESALLOWED_PROTOCOLSrE   r]   r   r!   r   r+   r+   7   s]         < %#@
 @
 @
 @
D'0 '0 '0 '0 '0r!   r+   c                      t                     r S t           t                    r fd}|S t           t                    r fd}|S t	          d          )a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                     | v r*|          }t          |          r || ||          S ||v rdS dv r(d         }t          |          r || ||          S ||v S dS )NT*F)callable)tagattrvalueattr_valr9   s       r   _attr_filterz.attribute_filter_factory.<locals>._attr_filter   s    j  %c?H%% 6#8Cu5558##4j  %c?H%% 6#8Cu555x''5r!   c                     |v S Nr   )ri   rj   rk   r9   s      r   rm   z.attribute_filter_factory.<locals>._attr_filter   s    :%%r!   z3attributes needs to be a callable, a list or a dict)rh   rO   dictlist
ValueError)r9   rm   s   ` r   attribute_filter_factoryrs      s     
 *d## 	 	 	 	 	$ *d## 	& 	& 	& 	& 	& 
J
K
KKr!   c                   `     e Zd ZdZeddf fd	Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Z xZS )rV   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTc                     t          |          | _        || _        || _        t	          j        ddt          d            t          t          |           j	        |fi |S )a   Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
            elements

        :arg bool strip_html_comments: whether or not to strip HTML comments

        ignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)rY   categorymodule)
rs   attr_filterrI   rJ   warningsfilterwarningsDeprecationWarningsuperrV   rE   )rD   rH   r9   rI   rJ   kwargsrR   s         r   rE   zBleachSanitizerFilter.__init__   su    < 4J??)B&#6  	8',		
 	
 	
 	
 ;u*D11:6LLVLLLr!   c              #      K   |D ]=}|                      |          }|st          |t                    r
|D ]}|V  9|V  >d S ro   )sanitize_tokenrO   rq   )rD   token_iteratortokenretsubtokens        r   sanitize_streamz%BleachSanitizerFilter.sanitize_stream  s{      # 
	 
	E%%e,,C #t$$  # # #H"NNNN# 				
	 
	r!   c              #   J  K   g }|D ]u}|rK|d         dk    r|                     |           &d                    d |D                       dd}g }|V  n"|d         dk    r|                     |           q|V  vd                    d |D                       dd}|V  dS )z/Merge consecutive Characters tokens in a streamtype
Charactersr   c                     g | ]
}|d          S datar   r   
char_tokens     r   r    z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>8      TTTJZ/TTTr!   )r   r   c                     g | ]
}|d          S r   r   r   s     r   r    z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>F  r   r!   N)appendjoin)rD   r   characters_bufferr   	new_tokens        r   merge_charactersz&BleachSanitizerFilter.merge_characters*  s     # 	 	E  =L00%,,U333
 !#TTBSTTT! ! !-	! !I )+%#OOOOv,..!((///KKKK GGTTBSTTTUU 
 
	 r!   c                     |                      |                     t          j                            |                               S ro   )r   r   r   Filter__iter__)rD   s    r   r   zBleachSanitizerFilter.__iter__K  s<    $$  !5!>!>t!D!DEE
 
 	
r!   c                 v   |d         }|dv r^|d         | j         v r|                     |          S | j        rdS d|v rt          |d                   |d<   |                     |          S |dk    r-| j        s$t          j        |d         ddd	
          |d<   |S dS |dk    r|                     |          S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        r   )StartTagEndTagEmptyTagrG   Nr   Commentz&quot;z&#x27;)"')entitiesr   )	rK   allow_tokenrI   r   disallowed_tokenrJ   r   escapesanitize_characters)rD   r   
token_types      r   r   z$BleachSanitizerFilter.sanitize_tokenP  s      6]
;;;V} 555''.../ 4t U?? %;5=$I$IE&M,,U3339$$+  - 4&M(,J,J! ! !f t<''++E222 Lr!   c                 &   |                     dd          }|s|S t                              t          |          }||d<   d|vr|S g }t	          j        |          D ]}|s|                    d          rt	          j        |          }|l|dk    r|                    ddd           n|                    d|d	           |t          |          d
z   d         }|r|                    d|d           |                    d|d           |S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        r   r   &Nampr   )r   r   Entity)r   rG      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   next_possible_entity
startswithmatch_entityr   len)rD   r   r   
new_tokenspartentity	remainders          r   r   z)BleachSanitizerFilter.sanitize_characters  sZ    yy$$ 	L&**+EtLLf d??L
 "6t<< 	D 	DD s## &3D99% #))<*M*MNNNN"))8V*L*LMMM !%S[[1_%6%6 7I  U"))<*S*STTT|TBBCCCCr!   c                    t          j        |          }t          j        dd|          }|                    dd          }|                                }	 t          |          }n# t          $ r Y dS w xY w|j        r|j        |v r|S n@|	                    d          r|S d|v r|
                    d          d         |v r|S d|v r|S dS )	zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   )r   convert_entitiesrer   replacelowerr   rr   schemer   split)rD   rk   rM   	new_valueparseds        r   sanitize_uri_valuez(BleachSanitizerFilter.sanitize_uri_value  s    "2599	 F6IFF	 %%h33	 OO%%		 i((FF 	 	 	44	 = 	} 111 2
 ##C((  iIOOC$8$8$;?P$P$P ***ts   A& &
A43A4c                 X   d|v r$i }|d                                          D ]\  }}|\  }}|                     |d         ||          s(|| j        v r |                     || j                  }|O|}|| j        v r<t          j        ddt          |                    }|	                                }|s|}d|d         f| j
        v r.|dt          j        d         dffv rt          j        d	|          r|d
k    r|                     |          }|||<   t          |          |d<   |S )z-Handles the case where we're allowing the tagr   rG   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   xlinkr   z
^\s*[^#\s])Nstyle)itemsry   attr_val_is_urir   rM   svg_attr_val_allows_refr   r   r   r.   svg_allow_local_hrefr   
namespacessearchsanitize_cssr   )	rD   r   attrsnamespaced_nameval	namespacerG   r   new_vals	            r   r   z!BleachSanitizerFilter.allow_token  s~   U?? E(-f(;(;(=(= ,- ,-$"1	4 ''ftSAA  #d&::: $ 7 7T=S T TI ( #C #d&BBB f%A3QTVVG%mmooG" & 
 & %-(D,EEE&&&1':FC+   9]C88 %$ #o55++C00C *-o&&2599E&Mr!   c                    |d         }|dk    rd|d         z  |d<   n|d         r|dv sJ g }|d                                          D ]V\  \  }}}|r|s||}}||t          j        vr|}nt          j        |         d|}|                    d|d	|d
           Wd|d         d                    |          d|d<   nd|d         z  |d<   |                    d          r|d         d d         dz   |d<   d|d<   |d= |S )Nr   r   z</%s>rG   r   )r   r   r   r   z="r   <r   >z<%s>selfClosingz/>r   )r   r   prefixesr   r   r   )rD   r   r   r   nsrG   vr   s           r   r   z&BleachSanitizerFilter.disallowed_token/  sz   6]
!!#eFm3E&MM6] 	3!99999E!&v!4!4!6!6  
TA  (d (#RB :=+A!A!A&*OO1>1G1K1K1KTT&RO ( 	 	 	 	. ).frwwu~~~~FE&MM #U6]2E&M99]## 	6!&M#2#.5E&M$f&Mr!   c                    t          j        |          }t          j        d                              d|          }|                    d          }t          j        dt          j        t          j        z            }|D ]}|                    |          s dS t          j        d|          sdS g }t          j	        d|          D ]{\  }}|s|
                                | j        v r|                    |d	z   |z   dz              B|
                                | j        v r|                    |d	z   |z   dz              |d                    |          S )
zSanitizes css in style tagszurl\s*\(\s*[^\s)]+?\s*\)\s*r   ;ak  ^(  # consider a style attribute value as composed of:
[/:,#%!.\s\w]    # a non-newline character
|\w-\w           # 3 characters in the form \w-\w
|'[\s\w]+'\s*    # a single quoted string of [\s\w]+ with trailing space
|"[\s\w]+"       # a double quoted string of [\s\w]+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
)*$)flagsr   z ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: )r   r   r   compiler   r   UVERBOSEmatchfindallr   rL   r   rN   r   )rD   r   partsgauntletr   r]   proprk   s           r   r   z"BleachSanitizerFilter.sanitize_css[  so    .u55 
9::>>sEJJ C  : $#	
 	
 	
  	 	D>>$'' rr x;UCC 	2:&@%HH 	8 	8KD% zz||t:::TD[50367777!<<<TD[5036777xxr!   )rS   r^   r_   r`   rb   rE   r   r   r   r   r   r   r   r   r   __classcell__)rR   s   @r   rV   rV      s          &"' )M )M )M )M )M )MV    B
 
 

- - -^; ; ;z5 5 5n: : :x* * *X* * * * * * *r!   rV   )	itertoolsr   r   rz   bleach._vendor.parser   xml.sax.saxutilsr   bleachr   bleach.utilsr   ra   rb   rc   rd   r   rangeINVISIBLE_CHARACTERSr   UNICODEr   r   r+   rs   SanitizerFilterrV   r   r!   r   <module>r      s         				  ) ) ) ) ) ) % % % % % %             / / / / / /  $ '	Iy    0//  wwFFUU55A;;b"uuR}}EEFFF  
 %"*S+?%?#%ErzRR  ! F0 F0 F0 F0 F0 F0 F0 F0R(L (L (LVZ Z Z Z ZM9 Z Z Z Z Zr!   