
    c8L              
       H   d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ g dg dg d	g d
g dddgddgg dg dg	Z ee          Zd eD             Z G d dej                  Zedk    r( ej        dej                    ej                     dS dS )zN
Automated tests for checking transformation algorithms (the models package).
    N)MmCorpus)
tfidfmodel)datapathget_tmpfilecommon_dictionarycommon_corpus)
Dictionary)compliersystemcomputer)euleriannodecyclegraphtreepath)r   flownetworkr   )loadingr   r   )userserverr   r   hamiltonianr   trees)r   kernelmalfunctionr   )r   r   r   c                 B    g | ]}t                               |          S  )
dictionarydoc2bow.0texts     ;lib/python3.11/site-packages/gensim/test/test_tfidfmodel.py
<listcomp>r$   $   s&    	5	5	5t*

T
"
"	5	5	5    c                   D    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )TestTfidfModelc                 H    t          t          d                    | _        d S )Nztestcorpus.mm)r   r   corpus)selfs    r#   setUpzTestTfidfModel.setUp(   s    x8899r%   c                     t          j        | j        d          }t          | j                  d         }||         }g d}|                     t          j        ||                     d S )NT	normalizer   ))r   3Ey?)   r/   )   r/   )r   
TfidfModelr)   list
assertTruenpallclose)r*   modeldoctransformedexpecteds        r#   test_transformzTestTfidfModel.test_transform+   sg    %dkTBBB 4;"CjaaaK::;;;;;r%   c           
         t          j        t                    }t          j        }|                     |j        |           |                     |j        t          j        |j        |t          t                                         t          j        t                    }|                     |j        |j                   d S )N)r   )
r   r2   r   r   dfsassertEqualidfsprecompute_idfswgloballen)r*   model1r=   model2s       r#   	test_initzTestTfidfModel.test_init6   s     &}55# 	S)))j&@QTVYZgVhVh&i&ijjj &2CDDDfk22222r%   c                    t          d          }t          j        | j        d          }|                    |           t          j                            |          }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       |                     t          j	        |g          |g                               t          d          }t          j        | j        d	          }|                    |           t          j                            |          }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       |                     t          j	        |g          |g                               t          j        | j        d	          t          j                            t          d
                    fdt          j                                                  D             }fdt          j                                                  D             }|                     t          j	        ||                     t          d         t          d         g}|                     t          j	        |d                  |d                                       |                     t          j	        |d                  |d                                       |                     t          j	        g          g                               t          d          }t          j        | j        dd          }|                    |           t          j                            |d           }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       t          j        | j        dd          t          j                            t          d
                    fdt          j                                                  D             }fdt          j                                                  D             }|                     t          j	        ||                     t          d         t          d         g}|                     t          j	        |d                  |d                                       |                     t          j	        |d                  |d                                       d S )Nzgensim_models.tstTr-   r0   r1   r   zgensim_models_smartirs.tstnfcsmartirsztfidf_model.tstc                 *    g | ]}j         |         S r   r?   r!   keymodel3s     r#   r$   z3TestTfidfModel.test_persistence.<locals>.<listcomp>_        HHHcS!HHHr%   c                 *    g | ]}j         |         S r   rK   r!   rM   model4s     r#   r$   z3TestTfidfModel.test_persistence.<locals>.<listcomp>`   rO   r%   pivotslopemmapc                 *    g | ]}j         |         S r   rK   rL   s     r#   r$   z3TestTfidfModel.test_persistence.<locals>.<listcomp>t   rO   r%   c                 *    g | ]}j         |         S r   rK   rQ   s     r#   r$   z3TestTfidfModel.test_persistence.<locals>.<listcomp>u   rO   r%   r   r   r2   r)   saveloadr4   r?   r5   r6   r   sortedkeys	r*   fnamer7   rD   tstvecidfs3idfs4rN   rR   s	          @@r#   test_persistencezTestTfidfModel.test_persistenceE   sc   /00%dkTBBB

5&++E22
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIIIE"Ivbz::;;; 899%dkEBBB

5&++E22
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIIIE"Ivbz::;;; &t{UCCC&++H5F,G,GHHHHHHVFK4D4D4F4F-G-GHHHHHHHVFK4D4D4F4F-G-GHHHE511222)VAY'F6!9$5vfQi7HIIJJJF6!9$5vfQi7HIIJJJF2Jr
;;<<< 899%dk!DDD

5&++E+==
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIII &t{!1EEE&++H5F,G,GHHHHHHVFK4D4D4F4F-G-GHHHHHHHVFK4D4D4F4F-G-GHHHE511222)VAY'F6!9$5vfQi7HIIJJJF6!9$5vfQi7HIIJJJJJr%   c                    t          d          }t          j        | j        d          }|                    |           t          j                            |d           }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       |                     t          j	        |g          |g                               t          d          }t          j        | j        d	
          }|                    |           t          j                            |d           }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       |                     t          j	        |g          |g                               t          j        | j        d	
          t          j                            t          d                    fdt          j                                                  D             }fdt          j                                                  D             }|                     t          j	        ||                     t          d         t          d         g}|                     t          j	        |d                  |d                                       |                     t          j	        |d                  |d                                       |                     t          j	        g          g                               t          d          }t          j        | j        dd          }|                    |           t          j                            |d           }|                     |j        |j        k               t          d         t          d         g}|                     t          j	        ||d                  ||d                                       |                     t          j	        ||d                  ||d                                       t          j        | j        dd          t          j                            t          d                    fdt          j                                                  D             }fdt          j                                                  D             }|                     t          j	        ||                     t          d         t          d         g}|                     t          j	        |d                  |d                                       |                     t          j	        |d                  |d                                       d S )Nzgensim_models.tst.gzTr-   rV   r0   r1   r   zgensim_models_smartirs.tst.gzrG   rH   ztfidf_model.tst.bz2c                 *    g | ]}j         |         S r   rK   rL   s     r#   r$   z>TestTfidfModel.test_persistence_compressed.<locals>.<listcomp>   rO   r%   c                 *    g | ]}j         |         S r   rK   rQ   s     r#   r$   z>TestTfidfModel.test_persistence_compressed.<locals>.<listcomp>   rO   r%   rS   c                 *    g | ]}j         |         S r   rK   rL   s     r#   r$   z>TestTfidfModel.test_persistence_compressed.<locals>.<listcomp>   rO   r%   c                 *    g | ]}j         |         S r   rK   rQ   s     r#   r$   z>TestTfidfModel.test_persistence_compressed.<locals>.<listcomp>   rO   r%   rZ   r_   s	          @@r#   test_persistence_compressedz*TestTfidfModel.test_persistence_compressed{   sm   233%dkTBBB

5&++E+==
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIIIE"Ivbz::;;; ;<<%dkEBBB

5&++E+==
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIIIE"Ivbz::;;; &t{UCCC&++H5J,K,KLLHHHHVFK4D4D4F4F-G-GHHHHHHHVFK4D4D4F4F-G-GHHHE511222)VAY'F6!9$5vfQi7HIIJJJF6!9$5vfQi7HIIJJJF2Jr
;;<<< ;<<%dk!DDD

5&++E+==
fk1222)VAY'E&)$4fVAY6GHHIIIE&)$4fVAY6GHHIII &t{!1EEE&++H5J,K,KLLHHHHVFK4D4D4F4F-G-GHHHHHHHVFK4D4D4F4F-G-GHHHE511222)VAY'F6!9$5vfQi7HIIJJJF6!9$5vfQi7HIIJJJJJr%   c                 @   t           d         t           d         g}t          j        t           d          }||d                  ||d                  g}t          j        t                     }||d                  ||d                  g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}|d d          }|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}|d d          }|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g d	g d
g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g d	g d
g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g d	g dg}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g dg dg}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g d	g dg}t          j        t           d          }||d                  ||d                  g}|d d          }|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g dg dg}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g dg dg}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g dg dg}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}|d d          }|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d          }||d                  ||d                  g}g dg d g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              t          j        t           d! d" d#          }||d                  ||d                  g}t          j        t           d$ d% d#          }||d                  ||d                  g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              d&}t          j        t           d'|(          }||d                  ||d                  g}d)t          d* t          D                       z  t          t                    z  }d)|z
  |z  |d+z  z   d)|z
  |z  |d,z  z   gfd-|d         D             fd.|d         D             g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              d&}t          j        t          d/|0          }||d                  ||d                  g}t          d1 t          D                       t          t                    z  }d)|z
  |z  |d2z  z   d)|z
  |z  |d3z  z   gfd4|d         D             fd5|d         D             g}|                     t	          j        |d         |d                              |                     t	          j        |d         |d                              d S )6Nr0   r1   rG   rH   r   tnnnnnlnn))         ?)   rp      rp   )   rp   )   rp   )   rp   )rs   g       @)	   rp   )
   rp   dnnann)rr   )rx         ?)ry   r|   bnn))ro   r0   )rq   r0   rs   r0   )rt   r0   )ru   r0   )rv   r0   )r~   )rx   r0   )ry   r0   Lnn))rs   g/e>?)rx   /e>?)ry   r   nxnnfn))ro   h\	@)rq   r   )rs   gh\?)rt   r   )ru   r   )rv   gh\@))rs   r   )rx   r   )ry   r   ntn))ro   qy	O
@)rq   r   )rs   gzSo?)rt   r   )ru   r   )rv   gqy	O@))rs   gzSo@)rx   r   )ry   r   npn))ro         @)rq   r   rr   )rt   r   )ru   r   )rv   gZ?)rw   )rx   r   )ry   r   nnxnnc))ro   (p ?)rq   r   )rs   r   )rt   r   )ru   r   )rv   r   ))rs   g>,p ?)rx   >,p ?)ry   r   c                     | S Nr   xs    r#   <lambda>z1TestTfidfModel.test_consistency.<locals>.<lambda>Z  s    q r%   c                     | | z  S r   r   r   ys     r#   r   z1TestTfidfModel.test_consistency.<locals>.<lambda>Z  s    WX[\W\ r%   )wlocalrA   rI   c                     | | z  S r   r   r   s    r#   r   z1TestTfidfModel.test_consistency.<locals>.<lambda>^  s
    q1u r%   c                     | S r   r   r   s     r#   r   z1TestTfidfModel.test_consistency.<locals>.<lambda>^  s    [\ r%   g?nnu)rI   rU   rp   c              3   N   K   | ] }t          t          |                    V  !d S r   )rB   setr    s     r#   	<genexpr>z2TestTfidfModel.test_consistency.<locals>.<genexpr>h  s.      )K)KT#c$ii..)K)K)K)K)K)Kr%   g      @r   c                 0    g | ]\  }}||d          z  fS r   r   r!   termidweightvector_normss      r#   r$   z3TestTfidfModel.test_consistency.<locals>.<listcomp>n  +    NNNNFFff|A./NNNr%   c                 0    g | ]\  }}||d          z  fS r0   r   r   s      r#   r$   z3TestTfidfModel.test_consistency.<locals>.<listcomp>o  r   r%   nnb)r   rI   rU   c              3   D   K   | ]}|D ]}t          |          d z   V  dS )rp   N)rB   )r!   r"   words      r#   r   z2TestTfidfModel.test_consistency.<locals>.<genexpr>y  s:      &Z&Z4UY&Z&ZTs4yy3&Z&Z&Z&Z&Z&Z&Zr%   g      B@g      9@c                 0    g | ]\  }}||d          z  fS r   r   r   s      r#   r$   z3TestTfidfModel.test_consistency.<locals>.<listcomp>  r   r%   c                 0    g | ]\  }}||d          z  fS r   r   r   s      r#   r$   z3TestTfidfModel.test_consistency.<locals>.<listcomp>  r   r%   )
r)   r   r2   r4   r5   r6   sumtextsrB   r   )	r*   docsr7   transformed_docsexpected_docsrU   average_unique_lengthaverage_character_lengthr   s	           @r#   test_consistencyzTestTfidfModel.test_consistency   s   q	6!9% %fu===!$q'NE$q'N;%f--tAwtAw8$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;QQQ$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;QQQ$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;HHH+++

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;HHH+++

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;HHH---

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;<<<%%%

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;    
 %fu===!$q'NE$q'N;QQQ$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;    
 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;    
 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;    
 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;QQQ$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fu===!$q'NE$q'N;    
 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK%f[[J\J\glmmm!$q'NE$q'N;%f__nnglmmmtAwtAw8$4Q$7q9IJJKKK$4Q$7q9IJJKKK %fuEJJJ!$q'NE$q'N; #c)K)KU)K)K)K&K&K KcRWjj X5[11ECK?5[11ECK?

 ONNNd1gNNNNNNNd1gNNN

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKK %eSXYYY!$q'NE$q'N;#&&Z&Z5&Z&Z&Z#Z#Z]`af]g]g#g 5[44ut|C5[44ut|C

 ONNNd1gNNNNNNNd1gNNN

 	$4Q$7q9IJJKKK$4Q$7q9IJJKKKKKr%   c           	         t           d         t           d         g}t          j        | j                   }||d                  ||d                  g}t          j        | j         dd          }||d                  ||d                  g}|                     t	          j        t          |d                   t          |d                                        |                     t	          j        t          |d                   t          |d                                        t          j        | j         dd          }||d                  ||d                  g}g dg dg}|                     t	          j        t          |d                   t          |d                                        |                     t	          j        t          |d                   t          |d                                        d S )Nr0   r1   r   rS   g      ?))rv   玽Ȅn?)ru   `5e?)rt   r   )rs   r   )rq   r   )ro   r   ))ry   @,p ?)rx   r   )rs   g@,p ?r)   r   r2   r4   r5   r6   r]   )r*   r   r7   r   r   s        r#   test_pivoted_normalizationz)TestTfidfModel.test_pivoted_normalization  s   q	6!9% %dk22!$q'NE$q'N;%dk!DDDtAwtAw8F+;A+>$?$?VWHXAYAYZZ[[[F+;A+>$?$?VWHXAYAYZZ[[[ %dk#FFF!$q'NE$q'N;    
 	F+;A+>$?$?VWHXAYAYZZ[[[F+;A+>$?$?VWHXAYAYZZ[[[[[r%   c           	      8   d }d }t           d         t           d         g}t          j        t           ||d          }||d                  ||d                  g}d |d         D             d	 |d         D             g}|                     t	          j        t          |d                   t          |d                                        |                     t	          j        t          |d                   t          |d                                        d S )
Nc                 ^    t          | t          j                  sJ t          | dz             S Nr0   )
isinstancer5   ndarrayiter)tfs    r#   r   z2TestTfidfModel.test_wlocal_wglobal.<locals>.wlocal  s*    b"*-----Q<<r%   c                     dS r   r   )df
total_docss     r#   rA   z3TestTfidfModel.test_wlocal_wglobal.<locals>.wglobal  s    1r%   r0   r1   F)r   rA   r.   r   c                 "    g | ]\  }}||d z   fS r   r   r!   r   r   s      r#   r$   z6TestTfidfModel.test_wlocal_wglobal.<locals>.<listcomp>  %    @@@nffffqj!@@@r%   c                 "    g | ]\  }}||d z   fS r   r   r   s      r#   r$   z6TestTfidfModel.test_wlocal_wglobal.<locals>.<listcomp>  r   r%   r   )r*   r   rA   r   r7   r   r   s          r#   test_wlocal_wglobalz"TestTfidfModel.test_wlocal_wglobal  s   	  	  	 	 	 	 q	6!9%%fVWX]^^^!$q'NE$q'N;@@Q@@@@@Q@@@

 	F+;A+>$?$?VWHXAYAYZZ[[[F+;A+>$?$?VWHXAYAYZZ[[[[[r%   c                 6   t           j                            t          d                    }g d}|D ]%}|                     t          ||                     &|                     t          |t                             t          t                               d S )Nztfidf_model_3_2.tst)rT   rU   rI   )	r   r2   r\   r   r4   hasattrr>   rB   r)   )r*   r7   attrsas       r#   test_backwards_compatibilityz+TestTfidfModel.test_backwards_compatibility  s    %**84I+J+JKK... 	/ 	/AOOGE1--....U6]++S[[99999r%   N)__name__
__module____qualname__r+   r;   rE   rd   rj   r   r   r   r   r   r%   r#   r'   r'   '   s        : : :	< 	< 	<3 3 34K 4K 4Kl4K 4K 4KlSL SL SLj\ \ \:\ \ \&: : : : :r%   r'   __main__z)%(asctime)s : %(levelname)s : %(message)s)formatlevel)__doc__loggingunittestnumpyr5   gensim.corpora.mmcorpusr   gensim.modelsr   gensim.test.utilsr   r   r   r   gensim.corporar	   r   r   r)   TestCaser'   r   basicConfigDEBUGmainr   r%   r#   <module>r      s   
       , , , , , , $ $ $ $ $ $ U U U U U U U U U U U U % % % % % % '&&:::)))%%%   ]g555$$$
	 Z
	5	5u	5	5	5V: V: V: V: V:X& V: V: V:r z GJRYR_````HMOOOOO r%   