
    c%                        d Z ddlZddlZddlZddlmZ ddlmZm	Z	  G d dej
                  Z G d dej
                  Z G d	 d
ej
                  Z G d dej
                  Z G d dej
                  Z G d dej
                  Z G d dej
                  Zedk    r0ej                            ej                    ej                     dS dS )z7
Automated tests for checking various utils functions.
    N)utils)datapathget_tmpfilec                   &    e Zd Zd Zd Zd Zd ZdS )TestIsCorpusc                 ^    t          j        d           }d}|                     ||           d S )N)FNr   	is_corpusassertEqual)selfresultexpecteds      6lib/python3.11/site-packages/gensim/test/test_utils.py	test_NonezTestIsCorpus.test_None   s2    && 6*****    c                    dgg}t          j        |          }d|f}|                     ||           ddgg}t          j        |          }d|f}|                     ||           g dg}t          j        |          }d|f}|                     ||           dgdgg}t          j        |          }d|f}|                     ||           dgdgdgdgg}t          j        |          }d|f}|                     ||           d S )Nr   g      @T   g       @)r   r      g      @   g       @r   r   r	   r   potentialCorpusr   r   s       r   test_simple_lists_of_tuplesz(TestIsCorpus.test_simple_lists_of_tuples   sA    $9+11/*6*** $W-.11/*6***???@11/*6*** $9wi011/*6***#9wi'WIF11/*6*****r   c                 j    dgg}t          j        |          }d|f}|                     ||           d S )N)r      Tr	   r   s       r   test_int_tupleszTestIsCorpus.test_int_tuples9   s@    "8*11/*6*****r   c                 h   t                      }|                    dg           |                    d           |                    ddg           |                    g d           |                    dgg           |D ]0}t          j        |          }d|f}|                     ||           1d S )Nhumanstar)r   r   r   r      r#   )r   stringF)listappendr   r
   r   )r   
potentialsnoCorpusr   r   s        r   test_invalid_formatsz!TestIsCorpus.test_invalid_formats?   s     VV
7)$$$'"""7F+,,,,,,---M?+,,," 	/ 	/H_X..Fx(HXv....	/ 	/r   N)__name__
__module____qualname__r   r   r   r)    r   r   r   r      sP        + + ++ + +>+ + +/ / / / /r   r   c                   ,    e Zd Zd Zd Zd Zd Zd ZdS )	TestUtilsc                 ^    d}d}|                      t          j        |          |           d S )NzCIt&#146;s the Year of the Horse. YES VIN DIESEL &#128588; &#128175;u5   Its the Year of the Horse. YES VIN DIESEL 🙌 💯)r   r   decode_htmlentities)r   bodyr   s      r   test_decode_entitieszTestUtils.test_decode_entitiesP   s3    UY2488(CCCCCr   c                     d}t          j        t          d                    5 }|                     t	          d |D                       |           d d d            d S # 1 swxY w Y   d S )N   testcorpus.mmc              3      K   | ]}d V  dS r   Nr-   .0_s     r   	<genexpr>z9TestUtils.test_open_file_existent_file.<locals>.<genexpr>Y   "       3 3q 3 3 3 3 3 3r   )r   	open_filer   r   sum)r   number_of_lines_in_fileinfiles      r   test_open_file_existent_filez&TestUtils.test_open_file_existent_fileV   s    "$_Xo6677 	N6S 3 3F 3 3 3335LMMM	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	Ns   .AA#&A#c                     |                      t                    5  t          j        d          5  	 d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nznon_existent_file.txtassertRaises	Exceptionr   r>   )r   s    r    test_open_file_non_existent_filez*TestUtils.test_open_file_non_existent_file[   s    y)) 	 	!899                	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s1   A>AA	AA	AAAc                     d}t          t          d                    }t          j        |          5 }|                     t          d |D                       |           d d d            d S # 1 swxY w Y   d S )Nr5   r6   c              3      K   | ]}d V  dS r8   r-   r9   s     r   r<   z@TestUtils.test_open_file_existent_file_object.<locals>.<genexpr>d   r=   r   )openr   r   r>   r   r?   )r   r@   file_objrA   s       r   #test_open_file_existent_file_objectz-TestUtils.test_open_file_existent_file_object`   s    "$1122_X&& 	N&S 3 3F 3 3 3335LMMM	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	Ns   .A..A25A2c                     d }|                      t                    5  t          j        |          5  	 d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S NrD   )r   rK   s     r   'test_open_file_non_existent_file_objectz1TestUtils.test_open_file_non_existent_file_objectf   s    y)) 	 	**                	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s3   AA A A	AA	AAAN)r*   r+   r,   r3   rB   rG   rL   rO   r-   r   r   r/   r/   O   sh        D D DN N N
  
N N N    r   r/   c                       e Zd Zd ZdS )TestSampleDictc                    ddddd}ddg}d |                                 D             }t          j        |dd	          }|                     ||           t          j        |d          }||v r|                     d
           d S d S )Nr   r   r   r#   )r   r   r   r   )r   r   )r   r   c                     g | ]	\  }}||f
S r-   r-   )r:   kvs      r   
<listcomp>z3TestSampleDict.test_sample_dict.<locals>.<listcomp>q   s     ===41aA===r   FT)itemsr   sample_dictr   
assertTrue)r   dexpected_dictexpected_dict_randomsampled_dictsampled_dict_randoms         r   test_sample_dictzTestSampleDict.test_sample_dictn   s    aA!$$(==17799===(Au55}555#/155"66 	"OOD!!!!!	" 	"r   N)r*   r+   r,   r_   r-   r   r   rQ   rQ   m   s#        " " " " "r   rQ   c                       e Zd Zd ZdS )TestTrimVocabByFreqc                     dddd}ddd}t          j        |d           |                     ||           ddddd}dddd}t          j        |d           |                     ||           d S )Nr#   r   r   word1word2word3)rd   rf   )topkrd   re   rf   word4)r   trim_vocab_by_freqr   )r   rZ   r[   s      r   test_trim_vocabz#TestTrimVocabByFreq.test_trim_vocabz   s    !a00"#a00 ++++M***!a!<<"#a!<< ++++M*****r   N)r*   r+   r,   rk   r-   r   r   ra   ra   y   s#        	+ 	+ 	+ 	+ 	+r   ra   c                       e Zd Zd ZdS )TestMergeDictsc                     dddd}dddd}t          j        ||          }ddddd	}|                     ||           d S )
Nr#   r   r   rc   r   
   )rd   rf   ri      rh   )r   merge_countsr   )r   d1d2res_dictr[   s        r   test_merge_dictszTestMergeDicts.test_merge_dicts   s_    1q111r22%b"--"#a!bII=11111r   N)r*   r+   r,   ru   r-   r   r   rm   rm      s#        2 2 2 2 2r   rm   c            	           e Zd Z ej        g dg dg dg dg dg dg          Zd Zd Zd	 Zd
 Z	d Z
d Zd Zd Zd Zd Zd ZdS )TestWindowing)r   r   r   r   r   )r   r   r   r   r#   )r   r   r   r#      )r   r   r#   rx   rp   )r   r#   rx   rp      )r#   rx   rp   ry   	   c                     |                      |j        |j                   |                     ||k                                               d S rN   )r   shaperY   all)r   r   actuals      r   _assert_arrays_equalz"TestWindowing._assert_arrays_equal   sE    6668+002233333r   c                     t          j        t          d          d          }t          j        ddgddgddgddgg          }|                     ||           d S )Nr#   r   r   r   r   r   )r   strided_windowsrangenparrayr   )r   outr   s      r   test_strided_windows1z#TestWindowing.test_strided_windows1   sj    #E!HHa008FFFF	
   	!!(C00000r   c                     t          j        d          }t          j        |d          }| j                                        }|                     ||           d|d<   |                     d|d         d           d S )Nro   r#   r   r   r   z!should make view rather than copy)r   aranger   r   arr10_5copyr   r   r   	input_arrr   r   s       r   test_strided_windows2z#TestWindowing.test_strided_windows2   sv    IbMM	#Iq11<$$&&!!(C000D	Yq\+NOOOOOr   c                     t          j        g dd          }t          j        |d          }t          j        d          }|                     ||           d S )Nthisistestobjectdtyper   r   )r   r   r   r   ndarrayr   r   s       r   -test_strided_windows_window_size_exceeds_sizez;TestWindowing.test_strided_windows_window_size_exceeds_size   sZ    H3338DDD	#Iq11:f%%!!(C00000r   c                     t          j        g dd          }t          j        |d          }t          j        |                                g          }|                     ||           d S )Nr   r   r   r   )r   r   r   r   r   r   r   s       r   ,test_strided_windows_window_size_equals_sizez:TestWindowing.test_strided_windows_window_size_equals_size   sg    H3338DDD	#Iq118Y^^--.//!!(C00000r   c                    g dddgg}t          j        |dd          }d |D             }|                     ||           t          j        |d          }d |D             }|                     |d	         g|           d S )
Nr   r   ar   documentr   F)ignore_below_sizec                 ,    g | ]}t          |          S r-   r%   r:   ws     r   rV   zMTestWindowing.test_iter_windows_include_below_window_size.<locals>.<listcomp>       (((q477(((r   c                 ,    g | ]}t          |          S r-   r   r   s     r   rV   zMTestWindowing.test_iter_windows_include_below_window_size.<locals>.<listcomp>   r   r   r   )r   iter_windowsr   )r   textsr   windowss       r   +test_iter_windows_include_below_window_sizez9TestWindowing.test_iter_windows_include_below_window_size   s    $$$vz&:; UCCC((C(((((( **((C(((%(W-----r   c                     g dddgg}t          t          j        |d                    }d |D             }ddgddgddgg}|                     ||           d S )	Nr   r   r   r   c                 ,    g | ]}t          |          S r-   r   r:   iterables     r   rV   z>TestWindowing.test_iter_windows_list_texts.<locals>.<listcomp>       ???8X???r   r   r   r   )r%   r   r   assertListEqualr   r   r   list_windowsr   s        r   test_iter_windows_list_textsz*TestWindowing.test_iter_windows_list_texts   sy    $$$vz&:;u)%3344??w???TNT3K&*1EF\844444r   c                 6   t          j        g dd          ddgg}t          t          j        |d                    }d |D             }dd	gd	d
gddgg}|                     ||           d|d         d<   |                     d|d         d                    d S )Nr   r   r   r   r   r   c                 ,    g | ]}t          |          S r-   r   r   s     r   rV   z>TestWindowing.test_iter_windows_uses_views.<locals>.<listcomp>   r   r   r   r   r   modifiedr   )r   r   r%   r   r   r   r   r   s        r   test_iter_windows_uses_viewsz*TestWindowing.test_iter_windows_uses_views   s    ---X>>>@TUu)%3344??w???TNT3K&*1EF\8444"
1U1Xa[11111r   c                 b   t          j        g dd          t          j        ddgd          g}t          t          j        |dd                    }d	|d
         d
<   |                     d|d
         d
                    d	|d         d
<   |                     d|d         d
                    d S )Nr   r   r   r   r   r   T)r   r   r   r   r   )r   r   r%   r   r   r   )r   r   r   s      r   test_iter_windows_with_copyz)TestWindowing.test_iter_windows_with_copy   s    H(((999Hfj):::
 u)%>>>??"
1q!---"
1q!-----r   c                 r    g dddggdg}g d}|                      t          j        |          |           d S )N)r   r   r   r   r#   rx   r   r   r   r   r#   rx   r   r   flatten)r   nested_listr   s      r   test_flatten_nestedz!TestWindowing.test_flatten_nested   sJ    !		Aq6*A.%%%{33X>>>>>r   c                 f    g d}g d}|                      t          j        |          |           d S )Nr   r   )r   
not_nestedr   s      r   test_flatten_not_nestedz%TestWindowing.test_flatten_not_nested   s>    '''
%%%z22H=====r   N)r*   r+   r,   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r-   r   r   rw   rw      s        bh  G4 4 41 1 1P P P1 1 11 1 1. . .5 5 52 2 2. . .? ? ?
> > > > >r   rw   c                       e Zd Zd Zd ZdS )TestSaveAsLineSentencec                    t          d          }d t          j        d                              d          D             }t          j        ||           t          j        |dd          5 }d |                                                                                    d          D             }|                     ||           d d d            d S # 1 swxY w Y   d S )	Ngensim_utils.tstc                 6    g | ]}|                                 S r-   splitr:   lines     r   rV   zHTestSaveAsLineSentence.test_save_as_line_sentence_en.<locals>.<listcomp>   0     
 
 
 JJLL
 
 
r   zhello world
how are you
rbutf8encodingc                 Z    g | ](}|                                                                 )S r-   stripr   r   s     r   rV   zHTestSaveAsLineSentence.test_save_as_line_sentence_en.<locals>.<listcomp>   ,    YYY$++--YYYr   	r   r   any2unicoder   save_as_line_sentencerJ   readr   r   r   corpus_fileref_sentencesfin	sentencess        r   test_save_as_line_sentence_enz4TestSaveAsLineSentence.test_save_as_line_sentence_en   s!   !"455
 
)*DEEKKDQQ
 
 

 	#M;???ZTF;;; 	7sYY#((**:J:J:L:L:R:RSW:X:XYYYIY666	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7   -ACCCc                    t          d          }d t          j        d                              d          D             }t          j        ||           t          j        |dd          5 }d |                                                                                    d          D             }|                     ||           d d d            d S # 1 swxY w Y   d S )	Nr   c                 6    g | ]}|                                 S r-   r   r   s     r   rV   zHTestSaveAsLineSentence.test_save_as_line_sentence_ru.<locals>.<listcomp>   r   r   u2   привет мир
как ты поживаешьr   r   r   r   c                 Z    g | ](}|                                                                 )S r-   r   r   s     r   rV   zHTestSaveAsLineSentence.test_save_as_line_sentence_ru.<locals>.<listcomp>  r   r   r   r   s        r   test_save_as_line_sentence_ruz4TestSaveAsLineSentence.test_save_as_line_sentence_ru   s"   !"455
 
)*_``ffgkll
 
 
 	#M;???ZTF;;; 	7sYY#((**:J:J:L:L:R:RSW:X:XYYYIY666	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7r   N)r*   r+   r,   r   r   r-   r   r   r   r      s2        7 7 7
7 
7 
7 
7 
7r   r   __main__)__doc__loggingunittestnumpyr   gensimr   gensim.test.utilsr   r   TestCaser   r/   rQ   ra   rm   rw   r   r*   rootsetLevelWARNINGmainr-   r   r   <module>r      s                3 3 3 3 3 3 3 39/ 9/ 9/ 9/ 9/8$ 9/ 9/ 9/x    !   <	" 	" 	" 	" 	"X& 	" 	" 	"
+ 
+ 
+ 
+ 
+(+ 
+ 
+ 
+2 2 2 2 2X& 2 2 2\> \> \> \> \>H% \> \> \>~7 7 7 7 7X. 7 7 76 z L'/***HMOOOOO r   