
    Ofc0                         d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ  G d
 de          ZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N   )CharSetGroupProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MBCSGroupProber)SBCSGroupProberc            	           e Zd ZdZdZ ej        d          Z ej        d          Z ej        d          Z	dddd	d
ddddZ
ej        fdZd Zd Zd ZdS )UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13c                     d | _         g | _        d | _        d | _        d | _        d | _        d | _        || _        t          j	        t                    | _        d | _        |                                  d S )N)_esc_charset_prober_charset_probersresultdone	_got_data_input_state
_last_charlang_filterlogging	getLogger__name__logger_has_win_bytesreset)selfr   s     9lib/python3.11/site-packages/chardet/universaldetector.py__init__zUniversalDetector.__init__Q   si    #'  "	 &'11"

    c                     dddd| _         d| _        d| _        d| _        t          j        | _        d| _        | j        r| j        	                                 | j
        D ]}|	                                 dS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        N        encoding
confidencelanguageFr    )r   r   r   r   r   
PURE_ASCIIr   r   r   r   r   )r   probers     r   r   zUniversalDetector.reset^   s     $(sMM	#&1# 	-$**,,,+ 	 	FLLNNNN	 	r    c                    | j         rdS t          |          sdS t          |t                    st          |          }| j        s|                    t          j                  rdddd| _        n|                    t          j	        t          j
        f          rdddd| _        nx|                    d          rdddd| _        nW|                    d	          rd
ddd| _        n6|                    t          j        t          j        f          rdddd| _        d| _        | j        d         	d| _         dS | j        t          j        k    rt| j                            |          rt          j        | _        nH| j        t          j        k    r3| j                            | j        |z             rt          j        | _        |dd         | _        | j        t          j        k    r| j        st/          | j                  | _        | j                            |          t4          j        k    r?| j        j        | j                                        | j        j        d| _        d| _         dS dS | j        t          j        k    r| j        sztA          | j                  g| _        | j        tB          j"        z  r&| j        #                    tI                                 | j        #                    tK                                 | j        D ]U}|                    |          t4          j        k    r0|j        |                                |j        d| _        d| _          nV| j&                            |          rd| _'        dS dS dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r#   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143zUTF-16Tr$   )(r   len
isinstance	bytearrayr   
startswithcodecsBOM_UTF8r   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr   r   r'   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr   	ESC_ASCIIr   r   r   feedr   FOUND_ITcharset_nameget_confidencer&   r   r
   r   NON_CJKappendr   r	   WIN_BYTE_DETECTORr   )r   byte_strr(   s      r   r<   zUniversalDetector.feedo   s    9 	F8}} 	F(I.. 	+ **H ~ "	""6?33 /+6-0+-/ / $$f&9&,&9&; < < / ,4-0+-/ / $$%899 /+C-0+-/ / $$%899 
/+C-0+-/ / $$fmV]%CDD / ,4-0+-/ / "DN{:&  	 
 55 	9&--h77 9$.$8!!"j&;; 9%,,T_x-GHH9$.$8!"233- 
 44  	++ N+;D<L+M+M(',,X66,:OO !#7D#7FFHH#7@B B !			! ! *"66 	+( =)89I)J)J(K%#n&<< D)001B1BCCC%,,\^^<<</  ;;x((L,AA /5/B171F1F1H1H/5#@ #@DK !%DIE %,,X66 +&*###	+ 	++ +r    c           	      6   | j         r| j        S d| _         | j        s| j                            d           n| j        t          j        k    rdddd| _        n| j        t          j        k    rd}d}d}| j	        D ]#}|s|
                                }||k    r|}|}$|r|| j        k    r{|j        }|j                                        }|
                                }|                    d	          r"| j        r| j                            ||          }|||j        d| _        | j                                        t(          j        k    r| j        d
         | j                            d           | j	        D ]}|st-          |t.                    rD|j        D ];}| j                            d|j        |j        |
                                           <^| j                            d|j        |j        |
                                           | j        S )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!asciir*   r+   r#   Nr"   ziso-8859r$   z no probers hit minimum thresholdz%s %s confidence = %s)r   r   r   r   debugr   r   r'   r9   r   r?   MINIMUM_THRESHOLDr>   lowerr0   r   ISO_WIN_MAPgetr&   getEffectiveLevelr   DEBUGr.   r   probers)	r   prober_confidencemax_prober_confidence
max_proberr(   r>   lower_charset_namer%   group_probers	            r   closezUniversalDetector.close   s    9 	;	~ !	@K12222 *"77 	@'.),')+ +DKK
 *"66 	@ $$'!J/ ( ( $*$9$9$;$;!$'<< (,=)!'J @4t7MM @)6%/%<%B%B%D%D"'6688
 &00<< J* J'+'7';';<N<H(J (J+7-7+5+>@ @
 ;((**gm; 	I{:& I!!"DEEE$($9 I IL' ! !,0BCC 
I&2&: G GF K--.E.4.A.4o.4.C.C.E.EG G G GG ))*A*6*C*6*?*6*E*E*G*GI I I I {r    N)r   
__module____qualname____doc__rG   recompiler7   r:   rB   rI   r   ALLr   r   r<   rS    r    r   r   r   3   s           #N332:l++L"
>22!/!/!/!/!/!/!/"02 2K $2#5      "k+ k+ k+ZB B B B Br    r   )rV   r1   r   rW   charsetgroupproberr   enumsr   r   r   	escproberr   latin1proberr	   mbcsgroupproberr
   sbcsgroupproberr   objectr   rZ   r    r   <module>rb      s   8    				 2 2 2 2 2 2 ; ; ; ; ; ; ; ; ; ; ' ' ' ' ' ' & & & & & & , , , , , , , , , , , ,k k k k k k k k k kr    