o
    tfU                  	   @   s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ e	dZ
ddgdgdgd	Ze	d
Zddd eed deddeddD Zede d ejZdZG dd deZG dd dZdd ZG dd dejZdS )    )chainN)unescape)html5lib_shim)
parse_shim)aabbracronymbZ
blockquotecodeZemiZliolstrongZulhreftitle)r   r   r   )httphttpsmailto c                 C   s   g | ]}t |qS  )chr).0cr   r   Y/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/bleach/sanitizer.py
<listcomp>*       r   	                []?c                   @   s   e Zd ZdS )NoCssSanitizerWarningN)__name__
__module____qualname__r   r   r   r   r#   5   s    r#   c                   @   s0   e Zd ZdZeeeddddfddZdd ZdS )	Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc           
      C   s   || _ || _|| _|| _|| _|pg | _|| _tj| j | jddd| _	t
d| _tjddddddd| _|du rjg }t|trC|}nt|tr]g }| D ]}	t|	ttfr\||	 qNd|v rltjd	td
 dS dS dS )a:  Initializes a Cleaner

        :arg set tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        F)tagsstripZconsume_entitiesZnamespaceHTMLElementsetreealwaysT)Zquote_attr_valuesZomit_optional_tagsZescape_lt_in_attrsZresolve_entitiessanitizeZalphabetical_attributesNstylez7'style' attribute specified, but css_sanitizer not set.)category)r(   
attributes	protocolsr)   strip_commentsfilterscss_sanitizerr   ZBleachHTMLParserparserZgetTreeWalkerwalkerZBleachHTMLSerializer
serializer
isinstancelistdictvaluestupleextendwarningswarnr#   )
selfr(   r/   r0   r)   r1   r2   r3   Zattributes_valuesr:   r   r   r   __init__V   sN   &




zCleaner.__init__c              	   C   s   t |tsd|jjdd }t||sdS | j|}t| || j	| j
| j| j| j| jd}| jD ]}||d}q4| j|S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        zargument cannot be of z type, zmust be of text typer   )sourceallowed_tagsr/   strip_disallowed_tagsstrip_html_commentsr3   allowed_protocols)rA   )r7   str	__class__r$   	TypeErrorr4   ZparseFragmentBleachSanitizerFilterr5   r(   r/   r)   r1   r3   r0   r2   r6   render)r?   textmessagedomfilteredZfilter_classr   r   r   clean   s*   


zCleaner.clean)	r$   r%   r&   __doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_PROTOCOLSr@   rO   r   r   r   r   r'   9   s    
Ur'   c                    sH   t  r S t tr fdd}|S t tr  fdd}|S td)a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                    s`   |  v r |  }t |r|| ||S ||v rdS d v r. d }t |r*|| ||S ||v S dS )NT*F)callable)tagattrvalueZattr_valr/   r   r   _attr_filter   s   z.attribute_filter_factory.<locals>._attr_filterc                    s   | v S Nr   )rV   rW   rX   rY   r   r   rZ      s   z3attributes needs to be a callable, a list or a dict)rU   r7   r9   r8   
ValueError)r/   rZ   r   rY   r   attribute_filter_factory   s   

r]   c                	   @   sr   e Zd ZdZeeeejej	ej
dddf	ddZdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd ZdS )rI   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTNc                 C   sT   t j| | t|| _t|| _t|| _|| _|	| _	|| _
|| _|
| _|| _dS )a_  Creates a BleachSanitizerFilter instance

        :arg source: html5lib TreeWalker stream as an html5lib TreeWalker

        :arg set allowed_tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg attr_val_is_uri: set of attributes that have URI values

        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
            references

        :arg svg_allow_local_href: set of SVG elements that can have local
            hrefs

        :arg bool strip_disallowed_tags: whether or not to strip disallowed
            tags

        :arg bool strip_html_comments: whether or not to strip HTML comments

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        N)r   Filterr@   	frozensetrB   rE   r]   attr_filterrC   rD   attr_val_is_urisvg_attr_val_allows_refr3   svg_allow_local_href)r?   rA   rB   r/   rE   ra   rb   rc   rC   rD   r3   r   r   r   r@     s   0



zBleachSanitizerFilter.__init__c                 c   s<    |D ]}|  |}|sqt|tr|E d H  q|V  qd S r[   )sanitize_tokenr7   r8   )r?   token_iteratortokenretr   r   r   sanitize_streamA  s   

z%BleachSanitizerFilter.sanitize_streamc                 c   s    g }|D ]2}|r(|d dkr| | qddd |D dd}g }|V  n|d dkr4| | q|V  qddd |D dd}|V  dS )	z/Merge consecutive Characters tokens in a streamtype
Charactersr   c                 S      g | ]}|d  qS datar   r   Z
char_tokenr   r   r   r   [  r   z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>)rm   ri   c                 S   rk   rl   r   rn   r   r   r   r   i  r   N)appendjoin)r?   re   Zcharacters_bufferrf   Z	new_tokenr   r   r   merge_charactersM  s,   


z&BleachSanitizerFilter.merge_charactersc                 C   s   |  | tj| S r[   )rq   rh   r   r^   __iter__)r?   r   r   r   rr   n  s   zBleachSanitizerFilter.__iter__c                 C   s   |d }|dv r|d | j v r| |S | jrdS | |S |dkr7| js5tj|d ddd	d
|d< |S dS |dkr@| |S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        ri   )StartTagEndTagEmptyTagnameNCommentrm   z&quot;z&#x27;)"')entitiesrj   )rB   allow_tokenrC   disallowed_tokenrD   r   escapesanitize_characters)r?   rf   
token_typer   r   r   rd   s  s"   



z$BleachSanitizerFilter.sanitize_tokenc                 C   s   | dd}|s
|S tt|}||d< d|vr|S g }t|D ]E}|s&q!|dr^t|}|dur^|dkrA|ddd n|d|d	 |t	|d
 d }|r]|d|d q!|d|d q!|S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        rm   r   &Namprj   )ri   rm   Entity)ri   rv      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   Znext_possible_entity
startswithZmatch_entityro   len)r?   rf   rm   Z
new_tokenspartentity	remainderr   r   r   r~     s.   

z)BleachSanitizerFilter.sanitize_charactersc                 C   s   t |}tdd|}|dd}| }zt|}W n
 ty'   Y dS w |j	r4|j	|v r2|S dS |
dr;|S d|v rJ|dd |v rJ|S d|v sRd	|v rT|S dS )
zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   r   )r   Zconvert_entitiesrer   replacelowerr   urlparser\   schemer   split)r?   rX   rE   Znormalized_uriparsedr   r   r   sanitize_uri_value  s*   


z(BleachSanitizerFilter.sanitize_uri_valuec           	      C   s   d|v r|i }|d   D ]k\}}|\}}| |d ||sq|| jv r1| || j}|du r/q|}|| jv rHtddt|}|	 }|sFq|}d|d f| j
v rc|dtjd dffv rctd	|rcq|d
krs| jrq| j|}nd}|||< q||d< |S )z-Handles the case where we're allowing the tagrm   rv   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   Zxlinkr   z
^\s*[^#\s])Nr-   r   )itemsr`   ra   r   rE   rb   r   r   r   r)   rc   r   
namespacessearchr3   Zsanitize_css)	r?   rf   attrsnamespaced_nameval	namespacerv   	new_valuenew_valr   r   r   r{     s>   


z!BleachSanitizerFilter.allow_tokenc                 C   s  |d }|dkrd|d  d|d< n^|d rg|dv sJ g }|d   D ]1\\}}}|r4|s4||}}|d u s=|tjvr@|}n
tj|  d| }|d	| d
| d q%d|d  d| d|d< n
d|d  d|d< |dr|d d d  d|d< d|d< |d= |S )Nri   rt   z</rv   >rm   )rs   ru   r   r   z="rx   <r   ZselfClosingz/>rj   )r   r   prefixesro   rp   r   )r?   rf   r   r   nsrv   vr   r   r   r   r|   Z  s(   
 
z&BleachSanitizerFilter.disallowed_token)r$   r%   r&   rP   rQ   rR   rS   r   ra   rb   rc   r@   rh   rq   rr   rd   r~   r   r{   r|   r   r   r   r   rI      s(    	
>!+=:ErI   )	itertoolsr   r   r=   Zxml.sax.saxutilsr   Zbleachr   r   r_   rQ   rR   rS   rp   rangeZINVISIBLE_CHARACTERScompileUNICODEr   r   UserWarningr#   r'   r]   ZSanitizerFilterrI   r   r   r   r   <module>   s0    & +