
    &Vff+                     :   d Z ddlZddlZddlZddlZddlZddlmZ  ed          	 	 	 dd            Z	 ed	          	 	 	 	 dd
            Z
 ed          	 	 	 	 	 dd            Z ed           G d de                      Z ed          d             ZdS )z0Deprecated text preprocessing APIs from Keras 1.    N)keras_exportz6keras._legacy.preprocessing.text.text_to_word_sequence!!"#$%&()*+,-./:;<=>?@[\]^_`{|}~	
T c                     |r|                                  } fd|D             }t                              |          }|                     |          } |                               }d |D             S )DEPRECATED.c                     i | ]}|S  r	   ).0csplits     `/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/legacy/preprocessing/text.py
<dictcomp>z)text_to_word_sequence.<locals>.<dictcomp>   s    0001a000    c                     g | ]}||S r	   r	   )r
   is     r   
<listcomp>z)text_to_word_sequence.<locals>.<listcomp>   s       !a A   r   )lowerstr	maketrans	translater   )
input_textfiltersr   r   translate_dicttranslate_mapseqs      `   r   text_to_word_sequencer      s      (%%''
0000000NMM.11M%%m44J


5
!
!C  s    r   z(keras._legacy.preprocessing.text.one_hotc           	      8    t          | |t          ||||          S )r   )hash_functionr   r   r   analyzer)hashing_trickhash)r   nr   r   r   r   s         r   one_hotr#       s0     	   r   z.keras._legacy.preprocessing.text.hashing_trickc                     t           n	dk    rd |t          | |||          }n ||           }fd|D             S )r   Nmd5c                     t          t          j        |                                                                           d          S )N   )inthashlibr%   encode	hexdigest)ws    r   r   z$hashing_trick.<locals>.hash_functionD   s0    w{188::..88::B???r   r   r   r   c                 8    g | ]} |          d z
  z  d z   S )   r	   )r
   r,   r   r"   s     r   r   z!hashing_trick.<locals>.<listcomp>N   s1    :::]]1Q'!+:::r   )r!   r   )textr"   r   r   r   r   r   r   s    ``     r   r    r    5   s     	%			@ 	@ 	@ #'e
 
 
 htnn:::::c::::r   z*keras._legacy.preprocessing.text.Tokenizerc                   h    e Zd ZdZ	 	 	 	 	 	 	 ddZd Zd	 Zd
 Zd Zd Z	d Z
ddZddZd Zd ZdS )	Tokenizerr   Nr   Tr   Fc                    d|v r)t          j        d           |                    d          }|                    dd          }	|rt          dt	          |          z             t          j                    | _        t          j        t                    | _
        || _        || _        || _        || _        |	| _        || _        || _        t          j        t                    | _        i | _        i | _        || _        d S )Nnb_wordszDThe `nb_words` argument in `Tokenizer` has been renamed `num_words`.document_countr   z Unrecognized keyword arguments: )warningswarnpop	TypeErrorr   collectionsOrderedDictword_countsdefaultdictr(   	word_docsr   r   r   	num_wordsr5   
char_level	oov_token
index_docs
word_index
index_wordr   )
selfr?   r   r   r   r@   rA   r   kwargsr5   s
             r   __init__zTokenizer.__init__U   s     M0   

:..I$4a88 	N>VLMMM&244$055

",$"%1#66 r   c                 ^   |D ]}| xj         dz  c_         | j        st          |t                    r@| j        r6t          |t                    rd |D             }n|                                }|}n?| j        #t          || j        | j        | j                  }n|                     |          }|D ]+}|| j	        v r| j	        |xx         dz  cc<   !d| j	        |<   ,t          |          D ]}| j        |xx         dz  cc<   t          | j	                                                  }|                    d d           | j        g }n| j        g}|                    d |D                        t!          t#          |t          t%          dt'          |          dz                                           | _        d | j                                        D             | _        t          | j                                                  D ]\  }}|| j        | j        |         <   d S )	Nr/   c                 6    g | ]}|                                 S r	   r   r
   	text_elems     r   r   z*Tokenizer.fit_on_texts.<locals>.<listcomp>   "    HHHi	 1 1HHHr   r-   c                     | d         S Nr/   r	   )xs    r   <lambda>z(Tokenizer.fit_on_texts.<locals>.<lambda>   s
    1Q4 r   T)keyreversec              3   &   K   | ]}|d          V  dS )r   Nr	   )r
   wcs     r   	<genexpr>z)Tokenizer.fit_on_texts.<locals>.<genexpr>   s&      22B"Q%222222r   c                     i | ]\  }}||	S r	   r	   )r
   r,   r   s      r   r   z*Tokenizer.fit_on_texts.<locals>.<dictcomp>   s    DDDDAq1aDDDr   )r5   r@   
isinstancelistr   r   r   r   r   r<   setr>   itemssortrA   extenddictziprangelenrC   rD   rB   )rE   textsr0   r   r,   wcounts
sorted_vocr   s           r   fit_on_textszTokenizer.fit_on_textsy   sp    	' 	'D1$ .*T4"8"8 .: ,!$-- ,HH4HHH#zz||=(/ $"j"j	  CC ----C , ,((($Q'''1,''''*+D$Q''XX ' 'q!!!Q&!!!!' t'--//00666>!JJ.)J22'222222 
Dq#j//A*=!>!>??@@
 
 EDDO,A,A,C,CDDD--//00 	4 	4DAq23DODOA.//	4 	4r   c                     | xj         t          |          z  c_         |D ]+}t          |          }|D ]}| j        |xx         dz  cc<   ,d S rO   )r5   ra   rZ   rB   )rE   	sequencesr   r   s       r   fit_on_sequenceszTokenizer.fit_on_sequences   su    s9~~- 	( 	(Cc((C ( ("""a'""""(	( 	(r   c                 F    t          |                     |                    S N)rY   texts_to_sequences_generator)rE   rb   s     r   texts_to_sequenceszTokenizer.texts_to_sequences   s    D55e<<===r   c              #     K   | j         }| j                            | j                  }|D ]}| j        st          |t                    r@| j        r6t          |t                    rd |D             }n|                                }|}n?| j        #t          || j
        | j        | j                  }n|                     |          }g }|D ]p}| j                            |          }|6|r||k    r||                    |           >|                    |           T| j        |                    |           q|V  d S )Nc                 6    g | ]}|                                 S r	   rJ   rK   s     r   r   z:Tokenizer.texts_to_sequences_generator.<locals>.<listcomp>   rM   r   r-   )r?   rC   getrA   r@   rX   rY   r   r   r   r   r   append)	rE   rb   r?   oov_token_indexr0   r   vectr,   r   s	            r   rk   z&Tokenizer.texts_to_sequences_generator   sn     N	/--dn== 	 	D .*T4"8"8 .: ,!$-- ,HH4HHH#zz||=(/ $"j"j	  CC ----CD 	1 	1O''**=  'Q)^^*6 KK888A^/KK000JJJJ;	 	r   c                 F    t          |                     |                    S rj   )rY   sequences_to_texts_generator)rE   rg   s     r   sequences_to_textszTokenizer.sequences_to_texts   s    D55i@@AAAr   c              #     K   | j         }| j                            | j                  }|D ]}g }|D ]}| j                            |          }|A|r)||k    r#| |                    | j        |                    I|                    |           _| j         |                    | j        |                    d                    |          }|V  d S )Nr   )r?   rC   ro   rA   rD   rp   join)rE   rg   r?   rq   r   rr   numwords           r   rt   z&Tokenizer.sequences_to_texts_generator   s      N	/--dn== 	 	CD 	B 	B**3//#  *SI%5%5*6 KK(HIIID))))^/KK @AAA88D>>DJJJJ	 	r   binaryc                 Z    |                      |          }|                     ||          S )N)mode)rl   sequences_to_matrix)rE   rb   r|   rg   s       r   texts_to_matrixzTokenizer.texts_to_matrix   s.    ++E22	''	'===r   c                 `   | j         s.| j        rt          | j                  dz   }nt          d          | j         }|dk    r| j        st          d          t          j        t          |          |f          }t          |          D ]!\  }}|s	t          j	        t                    }|D ]}||k    r	||xx         dz  cc<   t          |                                          D ]\  }}	|dk    r|	||         |<   |dk    r|	t          |          z  ||         |<   9|dk    rd||         |<   K|dk    radt          j        |	          z   }
t          j        d| j        d| j                            |d          z   z  z             }|
|z  ||         |<   t          d	|          #|S )
Nr/   zKSpecify a dimension (`num_words` argument), or fit on some text data first.tfidfz7Fit the Tokenizer on some data before using tfidf mode.countfreqrz   r   zUnknown vectorization mode:)r?   rC   ra   
ValueErrorr5   npzeros	enumerater:   r=   r(   rY   r[   logrB   ro   )rE   rg   r|   r?   rP   r   r   countsjr   tfidfs               r   r}   zTokenizer.sequences_to_matrix   s   ~ 		' 0014		 6  
 I7??4#6?I   Hc)nni011	** 	J 	JFAs  ,S11F  	>>q			Q				V\\^^,, J J17??AaDGGV^^#c((lAaDGGX%%AaDGGW__ RVAYYB&-T_5H5HA5N5N1NOP C !3hAaDGG$%BDIII#J$ r   c                 `   t          j        | j                  }t          j        | j                  }t          j        | j                  }t          j        | j                  }t          j        | j                  }| j        | j        | j	        | j
        | j        | j        | j        |||||dS )N)r?   r   r   r   r@   rA   r5   r<   r>   rB   rD   rC   )jsondumpsr<   r>   rB   rC   rD   r?   r   r   r   r@   rA   r5   )rE   json_word_countsjson_word_docsjson_index_docsjson_word_indexjson_index_words         r   
get_configzTokenizer.get_config  s    :d&677DN33*T_55*T_55*T_55 |ZZ/"1+')))
 
 	
r   c                 l    |                                  }| j        j        |d}t          j        |fi |S )N)
class_nameconfig)r   	__class____name__r   r   )rE   rF   r   tokenizer_configs       r   to_jsonzTokenizer.to_json2  sD    "".1
 
 z*55f555r   )Nr   Tr   FNN)rz   )r   
__module____qualname____doc__rG   re   rh   rl   rk   ru   rt   r~   r}   r   r   r	   r   r   r2   r2   Q   s         6"! "! "! "!H.4 .4 .4`( ( (> > >     DB B B  $> > > >, , , ,\
 
 
,6 6 6 6 6r   r2   z4keras._legacy.preprocessing.text.tokenizer_from_jsonc                    t          j        |           }|                    d          }t          j        |                    d                    }t          j        |                    d                    }t          j        |                    d                    }d |                                D             }t          j        |                    d                    }d |                                D             }t          j        |                    d                    }t          d	i |}||_        ||_        ||_        ||_	        ||_
        |S )
r   r   r<   r>   rB   c                 4    i | ]\  }}t          |          |S r	   r(   r
   kvs      r   r   z'tokenizer_from_json.<locals>.<dictcomp>E  $    ;;;1#a&&!;;;r   rD   c                 4    i | ]\  }}t          |          |S r	   r   r   s      r   r   z'tokenizer_from_json.<locals>.<dictcomp>G  r   r   rC   r	   )r   loadsro   r8   r[   r2   r<   r>   rB   rC   rD   )	json_stringr   r   r<   r>   rB   rD   rC   	tokenizers	            r   tokenizer_from_jsonr   ;  s-    z+..!!(++F*VZZ6677K
6::k2233IFJJ|4455J;;
(8(8(:(:;;;JFJJ|4455J;;
(8(8(:(:;;;JFJJ|4455J##F##I'I#I%I%I%Ir   )r   Tr   )r   Tr   N)Nr   Tr   N)r   r:   r)   r   r6   numpyr   keras.src.api_exportr   r   r#   r    objectr2   r   r	   r   r   <module>r      s}   6 6            - - - - - - FGG 3

	! ! ! HG!$ 899 3

   :9( >?? 2

; ; ; @?;6 :;;f6 f6 f6 f6 f6 f6 f6 <;f6R DEE  FE  r   