
    ch                     4   d Z ddlZddlZddlZddlZddlZddlZddlZddlZ	ddl
mZ 	 ddlmZ dZn# eef$ r dZY nw xY wddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZmZmZ g dddgddgddgdgg dgZ d Z!d Z" G d dej#                  Z$ G d dej#                  Z% G d dej#                  Z& G d dej#                  Z' e(e$d          sd#dZ) e*e$de)           e+dk    r* ej,        dej-                     ej.        d!"           dS dS )$zN
Automated tests for checking transformation algorithms (the models package).
    N)log_capture)emd2TF)utils)word2veckeyedvectors)check_output)datapathget_tmpfiletemporary_filecommon_texts	LeeCorpuslee_corpus_list)computer
artificialintelligencer   treeshumanr   graph)r   r   systemc                 >    | dk    rt           j        S t           j        S Nr   r   RULE_DISCARDRULE_DEFAULTwordcount	min_counts      9lib/python3.11/site-packages/gensim/test/test_word2vec.py_ruler    0   s     w "!!!!    c                      t          d          } t          j        t          d          }|                    |            t          j                    }|                    |           S )Ngensim_word2vec.tst   r   )r
   r   Word2Vec	sentencessaveload)tmpfmodels     r   load_on_instancer,   7   sW    ,--Di1555E	JJtE::dr!   c                   p   e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
dVd
Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"d" Z#d# Z$d$ Z%d% Z&d& Z'd' Z(d( Z)dWd+Z*d, Z+d- Z,d. Z-d/ Z. e/j0        d0e1j2        vd12          d3             Z3dXd4Z4d5 Z5dXd6Z6d7 Z7d8 Z8d9 Z9dXd:Z:d; Z;d< Z<d= Z=d> Z>d? Z?d@ Z@dA ZAdB ZBdC ZCdD ZDdE ZEdF ZFdG ZGdH ZHdI ZIdJ ZJdK ZKdL ZL eM            dM             ZN eM            dN             ZOdO ZPdP ZQdQ ZRdR ZSdS ZTdT ZUdU ZVd*S )YTestWord2VecModelc                 r   ddddddddddddd}|                                 }t          j        ddddd	          }t          j        ddddd
	          }|                    |           |                    |           |                     t          |j                  d           |                     t          |j                  d           |                                D ]l}|                     |j                            |d          ||                    |                     |j                            |d          ||                    mdddddddd}|                    |d           |                    |d           |                     |j                            dd          d           |                     |j                            dd          d           |                     t          |j                  d           |                     t          |j                  d           dS )zVTest that the algorithm is able to build vocabulary from given
        frequency table         )minorsr   r   r   epsr   surveyuserr   time	interfaceresponse
   r   *   r$   vector_sizer   seedhsnegative      r   )r   r   r   r   r   r   r   Tupdater   r      N)	copyr   r&   build_vocab_from_freqassertEquallenwvkeysget_vecattr)self	freq_dictfreq_dict_origmodel_hs	model_negknew_freq_dicts          r   test_build_vocab_from_freqz,TestWord2VecModel.test_build_vocab_from_freqA   sC    a11!Qq	
 
	 #))$qraZ[\\\%"q[\]]]	&&y111''	222X[))2...Y\**B///$$&& 	V 	VAX[44Q@@.QRBSTTTY\55aAA>RSCTUUUU QTUabmn
 
 	&&}T&BBB''d'CCC00'BBAFFF00wGGKKKX[))2...Y\**B/////r!   c           	      &   ddgddgddgddgg}t          j        |dddddd	          }|                     t          |j                  d           |                     |j                            dd
          d           |                     |j                            dd
          d           ddgddgddgddgg dg}t          j        |dddddd	          }|                     t          |j                  d           |                     |j                            dd
          d           |                     |j                            dd
          d           |                     |j                            dd
          d           dS )z)Test Prune vocab while scanning sentencesr   r   r4   r:   r   r0   r;   r$   )r=   r   max_vocab_sizer>   r?   r@   r   r1   r2   )r3   r5   r3   r5   r3   r3   N)r   r&   rH   rI   rJ   rL   )rM   r'   r+   s      r   test_prune_vocabz"TestWord2VecModel.test_prune_vocab_   s    hhuh	
	 !)qYZachituvvvUX***--gw??CCC--h@@!DDD hhuh>>>
	 !)qYZachituvvvUX***--gw??CCC--h@@!DDD--h@@!DDDDDr!   c                     t          j        ddd          }|                    t                    d         }|                     |d           d S )Nr:   r   r;   )r=   r   r>      )r   r&   
scan_vocabr'   rH   )rM   r+   total_wordss      r   test_total_word_countz'TestWord2VecModel.test_total_word_county   sM    !bABGGG&&y11!4b)))))r!   c                    t          j        dddd          }|                    t                     |                                }|                     |d         d           |                     |d         d           |                     |d         d	           |                     |j        d           t          j        ddd
d          }|                    t                     |                                }|                     |d         d           |                     |d         d           |                     |d         d           |                     |j        d           d S )Nr:   r2   r   )r=   max_final_vocabr   sampledrop_unique   retain_totalnum_retained_wordsr$   r0         r1   )r   r&   rZ   r'   prepare_vocabrH   effective_min_count)rM   r+   reported_valuess      r   test_max_final_vocabz&TestWord2VecModel.test_max_final_vocab~   s`    !b!qYZ[[[###--//7<<<8!<<<)=>BBB2A666 !b!qYZ[[[###--//7;;;8"===)=>BBB2A66666r!   c                    t          j        t          ddddd          }t          j        t          ddddd          }|                     t	          |j                  d           |                     |j                            dd	          d
           |                    t          d           |                    t          d           |                     |j                            dd	          d           |                     |j                            dd	          d           | 	                    t	          |j                  d           | 	                    t	          |j                  d           dS )Test that the algorithm is able to add new words to the
        vocabulary and to a trained model when using a sorted vocabularyr:   r   r;   r$   r<   rA   rB   r   r   r1   TrC   r2   r   rE   N)
r   r&   r'   
assertTruerI   rJ   rL   build_vocabnew_sentencesrH   )rM   rP   rQ   s      r   test_online_learningz&TestWord2VecModel.test_online_learning   sL    $YB!RTYZefggg%iR1SUZ[fghhh	HK(("---//AA1EEE]4888mD999//AA1EEE//gFFJJJX[))2...Y\**B/////r!   c                    t          d          }t          j        t          ddddd          }|                    |           t          j                            |          }|                     t          |j                  d           |	                    t          d	           |                    t          |j        |j        
           |                     t          |j                  d           dS )rk   r#   r:   r   r;   rA   r<   rB   TrC   total_examplesepochsrE   N)r
   r   r&   r'   r(   r)   rl   rI   rJ   rm   rn   traincorpus_countrs   rH   )rM   r*   rQ   s      r   test_online_learning_after_savez1TestWord2VecModel.test_online_learning_after_save   s     011%iR1SUZ[fghhh	t%**400	IL))2...mD999i6LU^UefffY\**B/////r!   c           
         t          t          d                    5 }t          t          d                    5 }t          j        t          |           t          j        t
          |           t          j        |ddddd          }t          j        |ddddd          }|                     t          |j
                  d	           |                     |j
                            d
d          d           |                    |d           |                    ||j        |j                   |                    |d           |                    ||j        |j                   |                     |j
                            d
d          d           |                     |j
                            dd          d           |                     t          |j
                  d           |                     t          |j
                  d           ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )rk   gensim_word2vec1.tstgensim_word2vec2.tstr:   r   r;   r$   corpus_filer=   r   r>   r?   r@   rA   rB   r   r   r1   Tr{   rD   r{   r[   rs   r2   r   rE   N)r   r
   r   save_as_line_sentencer'   rn   r   r&   rl   rI   rJ   rL   rm   rt   corpus_total_wordsrs   rH   )rM   r{   new_corpus_filerP   rQ   s        r   test_online_learning_from_filez0TestWord2VecModel.test_online_learning_from_file   s    K(>??@@ 	4K{+ABBCC	4GV'	;???'GGG([b\]df,-; ; ;H )kr]^eg-.< < <IOOC,,b111OOHK33GWEEqIII  _T JJJNNHD_hphwNxxx!!od!KKKOO+9T]e]l  n n nOOHK33GWEEqIIIOOHK33L'JJANNNS--r222S..333)	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4s5   H>G H&H>&H*	*H>-H*	.H>>IIc           
      P   t          t          d                    5 }t          t          d                    5 }t          j        t          |           t          j        t
          |           t          d          }t          j        |ddddd          }|                    |           t          j        	                    |          }| 
                    t          |j                  d	           |                    ||j        |j        
           |                    |d           |                    ||j        |j        
           |                     t          |j                  d           ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )rk   rx   ry   r#   r:   r   r;   rA   rz   rB   r}   Tr|   rE   N)r   r
   r   r~   r'   rn   r   r&   r(   r)   rl   rI   rJ   rt   r   rs   rm   rH   )rM   r{   r   r*   rQ   s        r   )test_online_learning_after_save_from_filez;TestWord2VecModel.test_online_learning_after_save_from_file   s    K(>??@@ 	4K{+ABBCC	4GV'	;???'GGG455D )kr]^eg-.< < <INN4    )..t44IOOC	--r222OOA]fofvOwww!!od!KKKOOYEa#,#3  5 5 5S..333#	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4s5   FD=F7FF	F
F	FF"FFc                 B   g g }}t           D ]1}d|v r|                    |           |                    |           2|                     t          d |D                                  |                    ||           |                    ||j        |j                   |                     d|j	        v            |                    |d           |                     d|j	        v            t          j        |j	        j                  }|                    |t          |          |j                   |                     t          j        |j	        j        |                     |j	                            dgdg          }|                     d|           d S )N	terrorismc              3      K   | ]}d |vV  	dS )r   N ).0lines     r   	<genexpr>z1TestWord2VecModel.onlineSanity.<locals>.<genexpr>   s'      GGKt3GGGGGGr!   rC   rq   Twar        )r   appendrl   allrm   rt   ru   rs   assertFalserJ   nprF   vectorsrI   allclosen_similarity
assertLess)rM   r+   trained_modelterroothersr   orig0sims           r   onlineSanityzTestWord2VecModel.onlineSanity   s   Bv# 	$ 	$Dd" $T""""d####GGGGGGGHHH&777F5+=elSSS0111%---ux/000())E#e**U\JJJUX%5u==>>>h##UGk];;C     r!   c           
      h    t          j        dddddddd          }|                     |           d	S )
%Test skipgram w/ hierarchical softmaxr$   rA   r   r1   r:   r;   r0   sgwindowr?   r@   r   rs   r>   workersNr   r&   r   rM   r+   s     r   test_sg_hs_onlinez#TestWord2VecModel.test_sg_hs_online   s@    !QqQaXZacmnooo%     r!   c           
      h    t          j        dddddddd	          }|                     |           d
S )"Test skipgram w/ negative samplingr$   r2   r      r1   r:   r;   r0   r   Nr   r   s     r   test_sg_neg_onlinez$TestWord2VecModel.test_sg_neg_online   s@    !QqQqY[bdnoppp%     r!   c                 l    t          j        dddddddddd	
  
        }|                     |           d
S )!Test CBOW w/ hierarchical softmaxr   r$   皙?rA   r1      r;   r0   )
r   	cbow_meanalphar   r?   r@   r   rs   r>   r   Nr   r   s     r   test_cbow_hs_onlinez%TestWord2VecModel.test_cbow_hs_online   sK    !AT!AQ
 
 
 	%     r!   c                 n    t          j        ddddddddddd	          }|                     |           d
S )Test CBOW w/ negative samplingr   r$   r   rA   r   r:   r;   r0   )r   r   r   r   r?   r@   r   rs   r>   r   r_   Nr   r   s     r   test_cbow_neg_onlinez&TestWord2VecModel.test_cbow_neg_online  sM    !AT!BQq
 
 
 	%     r!   c                 $   t          d          }t          j        t          d          }|                    |           |                     |t          j                            |                     |j        }|                    |           t          j	                            |          }| 
                    t          j        |j        |j                             |                     t          |          t          |                     dS )&Test storing/loading the entire model.r#   r$   r%   N)r
   r   r&   r'   r(   models_equalr)   rJ   r   KeyedVectorsrl   r   r   r   rH   rI   )rM   r*   r+   rJ   	loaded_wvs        r   test_persistencez"TestWord2VecModel.test_persistence	  s    011!)q999

4%!2!7!7!=!=>>>X
 -22488	BJ	0ABBCCCR#i..11111r!   c                     t          d          }t          j                            |          }|                    dg          }|J dS )z?Can we still load a model created with an older gensim version?zmodel-from-gensim-3.8.0.w2vtestN)r	   r   r&   r)   score)rM   pathr+   xs       r   %test_persistence_backwards_compatiblez7TestWord2VecModel.test_persistence_backwards_compatible  sK    566!&&t,,KK!!r!   c                    t          t          d                    5 }t          j        t          |           t          d          }t          j        |d          }|                    |           |                     |t
          j        	                    |                     |j
        }|                    |           t          j        	                    |          }|                     t          j        |j        |j                             |                     t%          |          t%          |                     ddd           dS # 1 swxY w Y   dS )zHTest storing/loading the entire model trained with corpus_file argument.r#   r$   )r{   r   N)r   r
   r   r~   r'   r   r&   r(   r   r)   rJ   r   r   rl   r   r   r   rH   rI   )rM   r{   r*   r+   rJ   r   s         r   test_persistence_from_filez,TestWord2VecModel.test_persistence_from_file  sZ   K(=>>?? 	6;'	;???455D%+KKKEJJteX%6%;%;D%A%ABBBBGGDMMM$166t<<IOOBK
I4EFFGGGSWWc)nn555	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6s   D%EEEc                     t          d          }t          j        t          dt                    }|                    |           |                     |t          j                            |                     dS )z[Test storing/loading the entire model with a vocab trimming rule passed in the constructor.r#   r$   r   	trim_ruleN)r
   r   r&   r'   r    r(   r   r)   rM   r*   r+   s      r   &test_persistence_with_constructor_rulez8TestWord2VecModel.test_persistence_with_constructor_rule-  sf    011!)qEJJJ

4%!2!7!7!=!=>>>>>r!   c                     t          j        t          dggz   dt                    }|                     d|j        v           |                     d|j        v           |                     d|j        v            dS )zCTest that returning RULE_DEFAULT from trim_rule triggers min_count.occurs_only_oncer0   r   r   r8   N)r   r&   r'   r    rl   rJ   r   s     r   test_rule_with_min_countz*TestWord2VecModel.test_rule_with_min_count4  sy    !)0B/C.D"DPQ]bcccux/000*%(:;;;ux/00000r!   c                     t          j        d          }|                    t          t                     |                     d|j        v           dS )zDTest applying vocab trim_rule to build_vocab instead of constructor.r$   r%   )r   r   N)r   r&   rm   r'   r    rl   rJ   r   s     r   	test_rulezTestWord2VecModel.test_rule;  sM    !A...)u555ux/00000r!   c                 |    d }t          j        t          d|          }|                     d|j        v           dS )z!Test that lambda trim_rule works.c                 >    | dk    rt           j        nt           j        S r   r   r   s      r   rulez0TestWord2VecModel.test_lambda_rule.<locals>.ruleC  s    )-P5%%e>PPr!   r$   r   r   N)r   r&   r'   rl   rJ   )rM   r   r+   s      r   test_lambda_rulez"TestWord2VecModel.test_lambda_ruleA  sL    	Q 	Q 	Q !)qDIIIux/00000r!   c                    t           j        dd         dk    rd}nt           j        dk     rd}nd}d|z  }t          j                            t          |                    }|                     |j        j        j	        t          |j                  |j        fk               |                     |j        j	        t          |j                  |j        fk               d	|z  }t          j                            t          |                    }|                     |j        j        j	        t          |j                  |j        fk               |                     |j        j	        t          |j                  |j        fk               dS )
z,Test loading pre-KeyedVectors word2vec modelNr0   r1   r2   _py3_4)r1   _py2_py3zword2vec_pre_kv%szword2vec_pre_kv_sep%s)sysversion_infor   r&   r)   r	   rl   rJ   r   shaperI   r=   syn1neg)rM   model_file_suffix
model_filer+   s       r   $obsolete_testLoadPreKeyedVectorModelz6TestWord2VecModel.obsolete_testLoadPreKeyedVectorModelI  sW    BQB6) 	' ($ 	' & & )+<<
!&&x
';';<<(.3ux==%BS2TTUUU+EHu?P/QQRRR -/@@
!&&x
';';<<(.3ux==%BS2TTUUU+EHu?P/QQRRRRRr!   c                     t           j                            t          d                    }|                     |j        j        d         t          |          k               dS )zETest loading pre-KeyedVectors word2vec model saved in word2vec formatword2vec_pre_kv_cr   N)r   r   load_word2vec_formatr	   rl   r   r   rI   r   s     r   )test_load_pre_keyed_vector_model_c_formatz;TestWord2VecModel.test_load_pre_keyed_vector_model_c_format_  sN    )>>xH[?\?\]]+A.#e**<=====r!   c                    t          d          }t          j        t          d          }|j                            |d           t          j                            |d          }| 	                    t          j        |j        d         |d                              t          j                            |d          }|                                 |                     t          j        |j        d         |d                              | 	                    t          j        |j                            dd          |d                              t          j                            |dd	          }|                     t!          |j                  d           t          j                            |dt          j        
          }|                     |j        j        |j        j        dz             dS )z9Test storing/loading the entire model in word2vec format.r#   r$   r%   Tbinaryr   normr1   )r   limit)r   datatyper0   N)r
   r   r&   r'   rJ   save_word2vec_formatr   r   r   rl   r   r   unit_normalize_allr   
get_vectorrH   rI   r   float16nbytes)rM   r*   r+   binary_model_kvnorm_only_modellimited_model_kvhalf_precision_model_kvs          r    test_persistence_word2vec_formatz2TestWord2VecModel.test_persistence_word2vec_formatd  s   011!)q999%%d4%888&3HHVZH[[EHW$5w7OPPQQQ&3HHVZH[[**,,,UXg%68PQQRRREH$7$7d$7$K$K_]dMeffggg'4II$W[cdIee-566:::".";"P"P
 #Q #
 #
 	079P9X9_bc9cdddddr!   c                 T   t          d          }t          j        t          d          }|j                            |d           t          j                            |d          }t          j                    }||_        | 	                    t          |j        t                     d S )Nr#   r$   r%   Tr   )r
   r   r&   r'   rJ   r   r   r   r   assertRaises
ValueErrorrt   )rM   r*   r+   kvbinary_models        r   test_no_training_c_formatz+TestWord2VecModel.test_no_training_c_formatv  s    011!)q999%%d4%888&;;D;NN(***l&8)DDDDDr!   c                 ^   t          d          }t          j        t          d          }|j                            |d           t          |d          }|                    d           |                                 | 	                    t          t          j        j        |d           d S )Nr#   r$   r%   Tr   r+b   13r
   r   r&   r'   rJ   r   openwritecloser   EOFErrorr   r   r   rM   tfiler+   fs       r   %test_too_short_binary_word2vec_formatz7TestWord2VecModel.test_too_short_binary_word2vec_format  s    122!)q999%%eD%999					(L$=$RTYbfgggggr!   c                 ^   t          d          }t          j        t          d          }|j                            |d           t          |d          }|                    d           |                                 | 	                    t          t          j        j        |d           d S )Nr#   r$   r%   Fr   r   r   r   r   s       r   #test_too_short_text_word2vec_formatz5TestWord2VecModel.test_too_short_text_word2vec_format  s    122!)q999%%eE%:::					(L$=$RTYbghhhhhr!   c                    t          d          }t          j        t          d          }|j                            |d           t          j                            |d          }| 	                    t          j        |j        d         |d         d                     t          j                            |d          }|                                 |                     t          j        |j        d         |d         d                     | 	                    t          j        |j                            dd	
          |d         d                     dS )zDTest storing/loading the entire model in word2vec non-binary format.r#   r$   r%   Fr   r   gư>)atolTr   g-C6?N)r
   r   r&   r'   rJ   r   r   r   r   rl   r   r   r   r   r   )rM   r*   r+   
text_modelr   s        r   +test_persistence_word2vec_format_non_binaryz=TestWord2VecModel.test_persistence_word2vec_format_non_binary  sA   011!)q999%%d5%999!.CCDQVCWW
EHW$5z'7JQUVVVWWW&3HHV[H\\**,,,UXg%68PW[\\\]]]Hd33_W5MTX
 
 
 	 	 	 	 	r!   c                 |   t          d          }t          j        t          d          }t          d          }|j                            ||d           t          j                            ||d          }| 	                    |j        
                    dd          |
                    dd                     d	S 
zHTest storing/loading the entire model and vocabulary in word2vec format.r#   r$   r%   gensim_word2vec.vocabTr   r   r   Nr
   r   r&   r'   rJ   r   r   r   r   rH   rL   rM   r*   r+   	testvocabbinary_model_with_vocab_kvs        r   +test_persistence_word2vec_format_with_vocabz=TestWord2VecModel.test_persistence_word2vec_format_with_vocab      011!)q999 788	%%dId%CCC%1%>%S%STXZclp%S%q%q"H  '22&227GDD	
 	
 	
 	
 	
r!   c                 |   t          d          }t          j        t          d          }t          d          }|j                            ||d           t          j                            ||d          }| 	                    |j        
                    dd          |
                    dd                     d	S r  r
  )rM   r*   r+   r  kv_binary_model_with_vocabs        r   0test_persistence_keyed_vectors_format_with_vocabzBTestWord2VecModel.test_persistence_keyed_vectors_format_with_vocab  r  r!   c                 v   t          d          }t          j        t          d          }t          d          }|j                            ||d           t          j                            ||d          }|	                    |           | 
                    t          t          j        j        |           dS )zTest storing/loading the entire model and vocabulary in word2vec format chained with
         saving and loading via `save` and `load` methods`.
         It was possible prior to 1.0.0 release, now raises Exceptionr#   r$   r%   r	  Tr   N)r
   r   r&   r'   rJ   r   r   r   r   r(   r   AttributeErrorr)   r  s        r   Ftest_persistence_word2vec_format_combination_with_standard_persistencezXTestWord2VecModel.test_persistence_word2vec_format_combination_with_standard_persistence  s     011!)q999 788	%%dId%CCC%1%>%S%STXZclp%S%q%q""''---.(*;*@$GGGGGr!   c                 X   t          d          }t          j        t          d          }|                    |d           |                     |t          j                            |                     |                     |t          j                            |d                     dS )	r   r#   r$   r%   r   )	sep_limitr)mmapN)r
   r   r&   r'   r(   r   r)   r   s      r   test_large_mmapz!TestWord2VecModel.test_large_mmap  s    011!)q999 	

41
%%%%!2!7!7!=!=>>> 	%!2!7!73!7!G!GHHHHHr!   c                    t                      }t          d |D                       }t          j        ddd                              |           |                     t          j                  dk               |                     t          fdj        j	        D                       |           t          j        j                            dd          g d	           t          j        dd
                              |           |                     t          j                  dk               t          j        j                            dd          g d           |                     t          t          j        g            |                     t          t          j        ||dz              dS )z"Test word2vec vocabulary building.c              3   4   K   | ]}t          |          V  d S N)rI   )r   sentences     r   r   z/TestWord2VecModel.test_vocab.<locals>.<genexpr>  s(      ??H#h--??????r!   r$   r   )r   r?   r@   iE  c              3   N   K   | ]}j                             |d           V   dS )r   N)rJ   rL   )r   rR   r+   s     r   r   z/TestWord2VecModel.test_vocab.<locals>.<genexpr>  s5      ]]!UX11!W==]]]]]]r!   thecode)r$   r$   r   r   )r?   r@   i  )r$   r$   r$   r   r%   N)r   sumr   r&   rm   rl   rI   rJ   rH   key_to_indexr   r   rL   r   RuntimeError)rM   corpusr[   r+   s      @r   
test_vocabzTestWord2VecModel.test_vocab  s   ??????? !A!a@@@&!!!EH-... 	]]]]uxG\]]]]]_jkkk
EH((77FFF !Q333&!!!EH-...
EH((77FFF 	,(92>>> 	,(96[[\_]]]]]r!   c                    t          j        dddd          }|                    t                     |                     |j        j        j        t          |j                  dfk               |                     |j	        j        t          |j                  dfk               |
                    t          |j        |j                   |j                            dd          }|j                            dd	
          }|j                            |gd          }d |D             }|                     ||           t          j        t          dddd          }|                     ||           dS )zTest word2vec training.r0   r$   r   r=   r   r?   r@   rq   r   r:   topnTr   ra   positiver*  c                 (    g | ]\  }}|d k    ||fS r   r   r   wr   s      r   
<listcomp>z3TestWord2VecModel.test_training.<locals>.<listcomp>  )    >>>fag>!S>>>r!   Nr   r&   rm   r'   rl   rJ   r   r   rI   syn1rt   ru   rs   most_similarr   rH   r   rM   r+   simsgraph_vectorsims2model2s         r   test_trainingzTestWord2VecModel.test_training  sX    !a1QOOO)$$$(.3ux==!2DDEEE
(S]]A,>>???Ie.@VVVx$$W2$66 x**7*>>%%~B%GG>>>>>u%%% "9!qQYZ[[[%(((((r!   c                    t          t          d                    5 }t          j        t          |           t          j        dddd          }|                    |           |                     |j	        j
        j        t          |j	                  dfk               |                     |j        j        t          |j	                  dfk               |                    ||j        |j                   |j	                            dd	
          }|j	                            dd          }|j	                            |gd          }d |D             }|                     ||           ddd           dS # 1 swxY w Y   dS )z1Test word2vec training with corpus_file argument.r#   r0   r$   r   r(  )r{   r}   r   r:   r)  Tr   ra   r+  c                 (    g | ]\  }}|d k    ||fS r.  r   r/  s      r   r1  z=TestWord2VecModel.test_training_from_file.<locals>.<listcomp>  s)    BBB&!SQ'\BaXBBBr!   N)r   r
   r   r~   r'   r   r&   rm   rl   rJ   r   r   rI   r4  rt   r   rs   r5  r   rH   )rM   tfr+   r7  r8  r9  s         r   test_training_from_filez)TestWord2VecModel.test_training_from_file  s    K(=>>?? 	*2'	2666%!qQQRSSSE"---OOEH,2s58}}a6HHIIIOOEJ,UX0BBCCCKKBE4LUZUaKbbb8((r(::D !8..wT.BBLH))L>)KKEBBEBBBET5)))#	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	*s   EE??FFc                    t          j        t          dddd          }|                    t          t	          t                              }|                     t	          |          t	          t                               dS )zTest word2vec scoring.r0   r$   r   r(  N)r   r&   r'   r   rI   rH   )rM   r+   scoress      r   test_scoringzTestWord2VecModel.test_scoring  sa    !)aAXYZZZ YI77Vc)nn55555r!   c           	         t                      }t          d          D ]]}t          j        dddd|d          }|                    |           t          j        |j        j        d                   }t          j        |j        j        d                   }t          j	        t          |j                  t
          j                  |j        _        d|j        j        d<   |                    ||j        |j        	           |                     ||j        j        d         k                                               |                     ||j        j        d         k                                               _d
S )z5Test word2vec training doesn't change locked vectors.r0   r2   r$   rA   )r=   r?   r@   r   r   r   r   )dtyper   rq   N)r   ranger   r&   rm   r   rF   rJ   r   onesrI   float32vectors_lockfrt   ru   rs   r   r   rl   )rM   r%  r   r+   locked0	unlocked1s         r   test_lockingzTestWord2VecModel.test_locking  sD   (( 	D 	DB%!AQRWYbcdddEf%%% geh.q122G 0 344I%'WS]]"*%M%M%MEH"(+EH"1%KKu/A%,KWWWi58+;A+>>CCEEFFFOOW(8(;;@@BBCCCC	D 	Dr!   c                 |   t          j        t                                }|j                            t          d                    \  }}|j                            t          d          d          \  }}|                     ||           |                     ||           |                     |d           |                     |d           | 	                    t          |          d           |d         }|                     d|           |                     d|           |                     d	|           d
S )z@Test that evaluating analogies on KeyedVectors give sane resultszquestions-words.txtmost_similar_cosmul)similarity_functionr         ?r   sectioncorrect	incorrectN)r   r&   r   rJ   evaluate_word_analogiesr	   rH   assertGreaterEqualassertLessEqualassertGreaterrI   assertIn)rM   r+   r   sectionsscore_cosmulsections_cosmulfirst_sections          r   test_evaluate_word_analogiesz.TestWord2VecModel.test_evaluate_word_analogies3  s*   !)++..(::8DY;Z;Z[[x(-(H(H*++ 5 )I )
 )
%o 	---?333s+++UC(((3x==!,,, i///i///k=11111r!   c                    t          j        t          d                    }t          j        |dd          }|j                            t          d                    }|d         d         }|d         d         }|d         }|                     d	|cxk     od
k     nc d| d           |                     d	|cxk     od
k     nc d| d           |                     d|cxk    odk     nc d| d           dS )[Test Spearman and Pearson correlation coefficients give sane results on similarity datasetshead500.noblanks.cor.bz2r1   r   )r   rs   wordsim353.tsvr   r$   r0   皙?rO  pearson  not between 0.1 & 1.0	spearman  not between 0.1 and 1.0r        V@OOV  not between 0.0 and 90.0N)r   LineSentencer	   r&   rJ   evaluate_word_pairsrl   )rM   r%  r+   correlationpearsonspearmanoovs          r   test_evaluate_word_pairsz*TestWord2VecModel.test_evaluate_word_pairsF  s/   &x0J'K'KLL!&AbAAAh228<L3M3MNNa.#q>!$!ng++++++++-W-W-W-WXXXh,,,,,,,,.\(.\.\.\]]]s))))T))))+P#+P+P+PQQQQQr!   c                    t          t          d                    5 }t          j        t	          j        t          d                    |           t	          j        |dd          }|j        	                    t          d                    }|d         d         }|d         d         }|d	         }| 
                    d
|cxk     odk     nc d| d           | 
                    d
|cxk     odk     nc d| d           | 
                    d|cxk    odk     nc d| d           ddd           dS # 1 swxY w Y   dS )r^  r#   r_  r1   r   )r{   r   rs   r`  r   r$   r0   ra  rO  rb  rc  rd  re  r   rf  rg  rh  N)r   r
   r   r~   r   ri  r	   r&   rJ   rj  rl   )rM   r>  r+   rk  rl  rm  rn  s          r   "test_evaluate_word_pairs_from_filez4TestWord2VecModel.test_evaluate_word_pairs_from_fileR  s   K(=>>?? 
	V2'(=hGa>b>b(c(ceghhh%""MMME(66x@P7Q7QRRK!!nQ'G"1~a(Ha.COOC'////C////1[G1[1[1[\\\OOC(0000S00002`h2`2`2`aaaOOC3--------/Tc/T/T/TUUU
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	Vs   DD99D= D=TNc                    |r|                     t                     t          j        |j        j        d                   }|rLt          d          }t          j        t          |           |	                    ||j
        |j                   n'|	                    t          |j        |j                   |                     ||j        j        d         k                                               d}d}|j                            |t!          |j        j                            }	d	 |	D                                 |          }
||                    |
           |                     |
d           |j        |         }|j                            |gd          }|                     |d |D             v            |                     |d |D             v            d
S )zEEven tiny models trained on LeeCorpus should pass these sanity checksr   r#   r}   rq   r$   attacksbombingsr)  c                     g | ]\  }}|S r   r   r   r   r   s      r   r1  z2TestWord2VecModel.model_sanity.<locals>.<listcomp>q  s    ///;4$///r!   N2   3   c                     g | ]\  }}|S r   r   rv  s      r   r1  z2TestWord2VecModel.model_sanity.<locals>.<listcomp>x  s    &E&E&Eet&E&E&Er!   c                     g | ]\  }}|S r   r   rv  s      r   r1  z2TestWord2VecModel.model_sanity.<locals>.<listcomp>y  s    )H)H)H;4$)H)H)Hr!   )rm   r   r   rF   rJ   r   r
   r   r~   rt   r   rs   ru   r   r   r5  rI   index_to_keyindexr   r   rl   )rM   r+   rt   with_corpus_fileranksr   tmpfile
query_wordexpected_wordr7  t_rank	query_vecr9  s                r   model_sanityzTestWord2VecModel.model_sanity`  s     
	Co...GEH,Q/00E e%&;<<+OWEEEU=U^c^jkkkkOE<NW\Wcdddeux'7'::??AABBB
"x$$Zc%(:O6P6P$QQ//$///55mDD 	!LL   ###HZ(	%%yk%;;
&E&Eu&E&E&EEFFF)H)H%)H)H)HHIIIIIr!   c           	      f    t          j        ddddddd          }|                     |           dS )	r   r$   r2   r   rA   r:   r0   r   r   r?   r@   r   rs   r   Nr   r&   r  r   s     r   
test_sg_hszTestWord2VecModel.test_sg_hs{  s=    !QqQaXZdefff%     r!   c           	      j    t          j        ddddddd          }|                     |d	           d S )
Nr$   r2   r   rA   r:   r0   r  Tr}  r  r   s     r   test_sg_hs_fromfilez%TestWord2VecModel.test_sg_hs_fromfile  sB    !QqQaXZdefff%$77777r!   c           	      f    t          j        ddddddd          }|                     |           d	S )
r   r$   r2   r   r   rA   r:   r0   r  Nr  r   s     r   test_sg_negzTestWord2VecModel.test_sg_neg  s=    !QqQqY[efggg%     r!   c           	      j    t          j        ddddddd          }|                     |d	
           d S )Nr$   r2   r   r   rA   r:   r0   r  Tr  r  r   s     r   test_sg_neg_fromfilez&TestWord2VecModel.test_sg_neg_fromfile  sB    !QqQqY[efggg%$77777r!   BULK_TEST_REPSz'bulk test only occasionally run locally)reasonc                    d}g }t          t          j        d                   }t          j                            dd          }t	          | |          }t          |          D ]?}	  ||           # t          $ r#}t          |d|           |dz  }Y d}~8d}~ww xY wt          |           t          t          j	        |                     | 
                    |dd	           dS )
ae  Not run by default testing, but can be run locally to help tune stochastic aspects of tests
        to very-very-rarely fail. EG:
        % BULK_TEST_REPS=200 METHOD_NAME=test_cbow_hs pytest test_word2vec.py -k "test_method_in_bulk"
        Method must accept `ranks` keyword-argument, empty list into which salient internal result can be reported.
        r   r  METHOD_NAMEtest_cbow_hsr~  z	 failed: r$   Nztoo many failures)intosenvirongetgetattrrE  	Exceptionprintr   meanassertEquals)rM   failuresr~  repsmethod_name	method_fniexs           r   test_method_in_bulkz%TestWord2VecModel.test_method_in_bulk  s    2:./00jnn]NCCD+..	t 	 	A	&&&&&   bb9:::A 	ebgenn(A':;;;;;s   &A33
B =BB c                 p    t          j        dddddddddd
  
        }|                     ||	           d
S )r   r   r$   ra  r0   rA   <     
r   r   r   r   r?   r@   r   rs   r   batch_wordsr  Nr  rM   r~  r+   s      r   r  zTestWord2VecModel.test_cbow_hs  sP    !ASq1A4
 
 
 	%u-----r!   c                 p    t          j        dddddddddd
  
        }|                     |d	
           d S )Nr   r$   ra  r0   rA   r  r  r  Tr  r  r   s     r   test_cbow_hs_fromfilez'TestWord2VecModel.test_cbow_hs_fromfile  sP    !ASq1A4
 
 
 	%$77777r!   c                 p    t          j        dddddddddd
  
        }|                     ||	           d
S )r   r   r$   r   rA   r   r:   r0   
r   r   r   r   r?   r@   r   rs   r   r_   r  Nr  r  s      r   test_cbow_negzTestWord2VecModel.test_cbow_neg  sP    !AT!BAa
 
 
 	%u-----r!   c                 p    t          j        dddddddddd
  
        }|                     |d	
           d S )Nr   r$   r   rA   r   r:   r0   r  Tr  r  r   s     r   test_cbow_neg_fromfilez(TestWord2VecModel.test_cbow_neg_fromfile  sP    !AT!BAa
 
 
 	%$77777r!   c           
      h    t          j        dddddddd          }|                     |           d	S )
z-Test skipgram with fixed window size. Use NS.r$   rA   Fr   r   r:   r0   r   r   shrink_windowsr?   r@   r   rs   r   Nr  r   s     r   test_sg_fixedwindowsizez)TestWord2VecModel.test_sg_fixedwindowsize  sG    !5Q1R
 
 
 	%     r!   c           
      l    t          j        dddddddd          }|                     |d	           d
S )zATest skipgram with fixed window size. Use HS and train from file.r$   rA   Fr   r:   r0   r  Tr  Nr  r   s     r    test_sg_fixedwindowsize_fromfilez2TestWord2VecModel.test_sg_fixedwindowsize_fromfile  sL    !5Q!B
 
 
 	%$77777r!   c                 p    t          j        dddddddddd
  
        }|                     ||	           d
S )z)Test CBOW with fixed window size. Use HS.r   r$   ra  rA   Fr:   r0   
r   r   r   r   r  r?   r@   r   rs   r   r  Nr  r  s      r   test_cbow_fixedwindowsizez+TestWord2VecModel.test_cbow_fixedwindowsize  sP    !AS51"a
 
 
 	%u-----r!   c                 p    t          j        dddddddddd	
  
        }|                     |d
           dS )z=Test CBOW with fixed window size. Use NS and train from file.r   r$   ra  rA   Fr   r:   r0   r  Tr  Nr  r   s     r   "test_cbow_fixedwindowsize_fromfilez4TestWord2VecModel.test_cbow_fixedwindowsize_fromfile  sP    !AS522q
 
 
 	%$77777r!   c                 0   t          j        t          dddd          }|j                            dd          }|j                            dd	          }|j                            |gd
          }d |D             }|                     ||           d S )Nr0   r$   r   r(  r   r:   r)  Tr   ra   r+  c                 (    g | ]\  }}|d k    ||fS r.  r   r/  s      r   r1  z1TestWord2VecModel.test_cosmul.<locals>.<listcomp>  r2  r!   )r   r&   r'   rJ   rM  r   rH   )rM   r+   r7  r8  r9  s        r   test_cosmulzTestWord2VecModel.test_cosmul  s    !)aAXYZZZx++G"+== x**7*>>,,|n2,NN>>>>>u%%%%%r!   c                    t          j        ddddd          }|                    t                     |                     |j        j        j        t          |j                  dfk               |                     |j	        j        t          |j                  dfk               |
                    t          |j        |j                   |j                            dd          }|j                            dd	
          }|j                            |gd          }d |D             }|                     ||           t          j        t          ddddd          }|                     ||           dS )zTest CBOW word2vec training.r0   r$   r   r=   r   r   r?   r@   rq   r   r:   r)  Tr   ra   r+  c                 (    g | ]\  }}|d k    ||fS r.  r   r/  s      r   r1  z8TestWord2VecModel.test_training_cbow.<locals>.<listcomp>  r2  r!   Nr3  r6  s         r   test_training_cbowz$TestWord2VecModel.test_training_cbow  s^    !a1qSTUUU)$$$(.3ux==!2DDEEE
(S]]A,>>???Ie.@VVVx$$W2$66 x**7*>>%%~B%GG>>>>>u%%% "9!qQST_`aaa%(((((r!   c                    t          j        ddddd          }|                    t                     |                     |j        j        j        t          |j                  dfk               |                     |j	        j        t          |j                  dfk               |
                    t          |j        |j                   |j                            dd          }|j                            dd	
          }|j                            |gd          }d |D             }|                     ||           t          j        t          ddddd          }|                     ||           dS )z5Test skip-gram (negative sampling) word2vec training.r0   r$   r   r  rq   r   r:   r)  Tr   ra   r+  c                 (    g | ]\  }}|d k    ||fS r.  r   r/  s      r   r1  z?TestWord2VecModel.test_training_sg_negative.<locals>.<listcomp>  r2  r!   Nr   r&   rm   r'   rl   rJ   r   r   rI   r   rt   ru   rs   r5  r   rH   r   r6  s         r   test_training_sg_negativez+TestWord2VecModel.test_training_sg_negative  ^    !a1qSTUUU)$$$(.3ux==!2DDEEE+EHq/AABBBIe.@VVVx$$W2$66 x**7*>>%%~B%GG>>>>>u%%% "9!qQST_`aaa%(((((r!   c                    t          j        ddddd          }|                    t                     |                     |j        j        j        t          |j                  dfk               |                     |j	        j        t          |j                  dfk               |
                    t          |j        |j                   |j                            dd          }|j                            dd	
          }|j                            |gd          }d |D             }|                     ||           t          j        t          ddddd          }|                     ||           dS )z0Test CBOW (negative sampling) word2vec training.r0   r$   r   r  rq   r   r:   r)  Tr   ra   r+  c                 (    g | ]\  }}|d k    ||fS r.  r   r/  s      r   r1  zATestWord2VecModel.test_training_cbow_negative.<locals>.<listcomp>*  r2  r!   Nr  r6  s         r   test_training_cbow_negativez-TestWord2VecModel.test_training_cbow_negative  r  r!   c                    t          j        ddddd          }|                    t                     |                    t          |j        |j                   |                     |j        	                    ddgddg                     |                     |j        	                    dgdg          |j        
                    dd          k               |                     t          |j        j	        ddgg            |                     t          |j        j	        g ddg           |                     t          |j        j	        g g            dS )	z)Test similarity and n_similarity methods.r0   r$   r   r  rq   r   r   N)r   r&   rm   r'   rt   ru   rs   rl   rJ   r   
similarityr   ZeroDivisionErrorr   s     r   test_similaritiesz#TestWord2VecModel.test_similarities1  s2    !a1qSTUUU)$$$Ie.@VVV--w.@7GBTUUVVV--wi'CCuxGZGZ[bdkGlGllmmm+UX-BWgDVXZ[[[+UX-BBRYHZ[[[+UX-BBKKKKKr!   c                    t          j        t          dddd          }|j                            dd          }|j                            dd          }|j                            |j        d         d          }|j                            |j        d         gd          }|                     ||           |                     ||           d	S )
z4Test word2vec similar_by_word and similar_by_vector.r0   r$   r   r(  r   r:   r)  r+  N)r   r&   r'   rJ   similar_by_wordr5  similar_by_vectorrH   )rM   r+   wordsims	wordsims2
vectorsimsvectorsims2s         r   test_similar_byz!TestWord2VecModel.test_similar_by>  s    !)aAXYZZZ8++G"+==H))7)DD	X//0A/KK
h++UXg->,?b+II9---[11111r!   c                 P   t          j        t                      d          }dD ]}t          j        |dd|          }d}d}|j                            |t          |j                            }d	 |D                                 |          }| 	                    |d
           dS )z Test word2vec parallel training.i'  )r2      iJ  )r=   r   r   israelipalestinianr)  c                     g | ]\  }}|S r   r   )r   r   r   s      r   r1  z3TestWord2VecModel.test_parallel.<locals>.<listcomp>S  s    888idCT888r!   rA   N)
r   RepeatCorpusr   r   r&   rJ   r5  rI   r|  r   )rM   r%  r   r+   origin_wordexpected_neighborr7  neighbor_ranks           r   test_parallelzTestWord2VecModel.test_parallelH  s    #IKK77 	. 	.G%f"[bcccE#K -8((3ux==(IID 984888>>?PQQMOOM1----	. 	.r!   c                     t          j        t          ddd          }t          j        t          ddd          }|                     ||           dS )z8Test word2vec results identical with identical RNG seed.r0   r;   r$   r   r>   r   N)r   r&   r'   r   )rM   r+   r:  s      r   
test_r_n_gzTestWord2VecModel.test_r_n_gV  sP    !)qr1MMM"9ANNN%(((((r!   c                    |                      t          |j                  t          |j                             |                     t	          j        |j        j        |j        j                             |j        r2|                     t	          j        |j        |j                             |j	        r2|                     t	          j        |j
        |j
                             t	          j        |j        j        d                   d         }|j        j        |         }|                     t	          j        |j        |         |j        |                              d S )Nr   )rH   rI   rJ   rl   r   r   r   r?   r4  r@   r   argsortexpandosr{  )rM   r+   r:  most_common_word_indexmost_common_words        r   r   zTestWord2VecModel.models_equal\  s   UXFI777EH$4fi6GHHIII8 	BOOBK
FK@@AAA> 	HOOBKv~FFGGG!#EH,=g,F!G!G!K 801GHEH-=$>	JZ@[\\]]]]]r!   c                     t          j        t          d                              ddgd          }|                     t          |          dk                                   g dd          }|                     |d           t          d	          }j        	                    |d
           t          j                            |d
          }t          j                    }||_        |                     t          |j        ddg           t          j        t          dd          }|                     t          |j        ddg           ddg}j                            |d                   |d         g}fd|D             }	                    |d          }
                    |d          }                    |	d          }|                     |
|           |                     |
|           dS )zNTest word2vec predict_output_word method handling for negative sampling schemer$   r%   r   r   rA   r)  )somerandomwordsNr#   Tr   r   )r   r@   c                 D    g | ]}j                             |          S r   )rJ   	get_index)r   r0  model_with_negs     r   r1  z>TestWord2VecModel.test_predict_output_word.<locals>.<listcomp>  s*    KKK!~(22155KKKr!   )r   r&   r'   predict_output_wordrl   rI   rH   r
   rJ   r   r   r   r   r   r$  r  )rM   predictions_with_negpredictions_out_of_vocabr*   kv_model_with_negbinary_model_with_negmodel_without_negstr_contextmixed_contextidx_contextprediction_from_strprediction_from_mixedprediction_from_idxr  s                @r   test_predict_output_wordz*TestWord2VecModel.test_predict_output_wordg  s!    "*9BBB-AA8WBU\]A^^011Q6777 $2#E#EFaFaFahi#E#j#j 14888 011..tD.AAA(5JJ4X\J]] ( 1 3 3#4 ,(=(QT\^eSfggg %-i1qQQQ,(9(MPXZaObccc  )'*44[^DDkRSnUKKKK{KKK,@@ST@UU . B B=WX B Y Y,@@ST@UU,.CDDD,.ABBBBBr!   c                    d}t           j                            t          |                    }|                     |j        j        j        dk               |                     t          |j                  dk               |                     t          |j        j	                  dk               |                     |j
        j        t          |j                  |j        j        fk               |                     t          |j        j        j                  dk               |                     |j        j        dk               |                     |d           dS )	;Test loading an old word2vec model of indeterminate versionword2vec_oldrB   d   rB   r   rB   Tr   Nr   r&   r)   r	   rl   rJ   r   r   rI   r{  r   r=   rH  	cum_tabler   rM   r   r+   s      r   test_load_old_modelz%TestWord2VecModel.test_load_old_model  s    $
!&&x
';';<<(.);<<<EH+,,,EH122b8999+EHux?S/TTUUUEH2899A=>>>-6777%t44444r!   c                    d}t           j                            t          |                    }|                     |j        j        j        dk               |                     t          |j                  dk               |                     t          |j        j	                  dk               |                     |j
        j        t          |j                  |j        j        fk               |                     t          |j        j        j                  dk               |                     |j        j        dk               |                     |d           dS )	r  word2vec_old_sepr  rB   r   r  Tr  Nr  r	  s      r   test_load_old_model_separatesz/TestWord2VecModel.test_load_old_model_separates  s    (
!&&x
';';<<(.);<<<EH+,,,EH122b8999+EHux?S/TTUUUEH2899A=>>>-6777%t44444r!   c                     d}t           j                            t          |                    }|                     |d           g d}|D ]}|                     |           dS )zTest loading pre-1.0 modelszw2v-lee-v0.12.0Tr  )
z0.12.0z0.12.1z0.12.2z0.12.3z0.12.4z0.13.0z0.13.1z0.13.2z0.13.3z0.13.4N)r   r&   r)   r	   r   _check_old_versionrM   r   r+   old_versionsold_versions        r   %obsolete_test_load_old_models_pre_1_0z7TestWord2VecModel.obsolete_test_load_old_models_pre_1_0  s     '
!&&x
';';<<%t444
 
 

 ( 	1 	1K##K0000	1 	1r!   c                 B    ddg}|D ]}|                      |           dS )zTest loading 1.x modelsz1.0.0z1.0.1Nr  rM   r  r  s      r   test_load_old_models_1_xz*TestWord2VecModel.test_load_old_models_1_x  sB     W
 ( 	1 	1K##K0000	1 	1r!   c                 B    g d}|D ]}|                      |           dS )zTest loading 2.x models)z2.0.0z2.1.0z2.2.0z2.3.0Nr  r  s      r   test_load_old_models_2_xz*TestWord2VecModel.test_load_old_models_2_x  sE    
 
 
 ( 	1 	1K##K0000	1 	1r!   c                 
   d}t           j                            t          |                    }|                     |j        d           |                     |j        d           g d}|D ]}|                     |           dS )zTest loading 3.x modelszword2vec_3.3N)z3.0.0z3.1.0z3.2.0z3.3.0z3.4.0)r   r&   r)   r	   rH   r^   r  r  s        r   test_load_old_models_3_xz*TestWord2VecModel.test_load_old_models_3_x  s     $
!&&x
';';<<.555.555
 
 
 ( 	1 	1K##K0000	1 	1r!   c                    t          j        d|           t          d          }t          j                            |                    |                    }|                     |j                   | 	                    t          |j                  dk               	 | 	                    |j        j        j        dk               nq# t          $ rd}t          d           t          |j                   t          t!          |j                             t          |j        j                   |d }~ww xY w| 	                    t          |j                            d                    dk               |                    t(          d	           |                    t(          |j        |j        
           t1          d          }|                    |           t          j                            |          }|                    t(          d	           |                    t(          |j        |j        
           d S )Nz!TESTING LOAD of %s Word2Vec MODELzold_w2v_models/w2v_{}.mdlr1   r   WVr  r0   TrC   rq   r#   )logginginfor	   r   r&   r)   formatassertIsNoner   rl   rI   rJ   r   r   r  r  dirsyn0r5  rm   r   rt   ru   rs   r
   r(   )rM   r  saved_models_dirr+   aer*   loaded_models          r   r  z$TestWord2VecModel._check_old_version  s   8+FFF#$?@@!&&'7'>'>{'K'KLL%2333EH*+++	OOEH,2f<==== 	 	 	$KKK%(OOO#eh--   %(-   H	 	EH11*==>>!CDDD/$777OE4Fu|\\\011

4(--d33   >>>?5;MV[Vbcccccs   (C 
D4AD//D4c                     ddg}t          j                    }|                    |           d}|                     |t	          |          v            dS )z@Test if warning is raised on non-ideal input to a word2vec modelr   machinezJEach 'sentences' item should be a list of words (usually unicode strings).N)r   r&   rm   rl   str)rM   loglinesr'   r+   warnings        r   test_build_vocab_warningz*TestWord2VecModel.test_build_vocab_warning  sX     i(	!##)$$$^3x==011111r!   c                    dgddgg}t          j        d          }|                    |           t          d          D ]V}|                    ||j        |j                   |xj        dz  c_        |j        |_        |d	k    r|xj        d
z  c_        Wd}| 	                    |t          |          v            dS )zKTest if warning is raised if alpha rises during subsequent calls to train()r   r   r   r$   r%   r:   rq   gMb`?rA   r   z6Effective 'alpha' higher than previous training cyclesN)r   r&   rm   rE  rt   ru   rs   r   	min_alpharl   r)  )rM   r*  r'   r+   epochr+  s         r   test_train_warningz$TestWord2VecModel.test_train_warning  s     Ig
	 !A...)$$$2YY 	$ 	$EKK	%2DU\KZZZKK5 KK#kEOz $t#J3x==011111r!   c                 P   t          j        dddd          }|                    t                     |                     t
                    5  |                    t          |j                   d d d            n# 1 swxY w Y   |                     t
                    5  |                    t          |j                   d d d            n# 1 swxY w Y   |                     t
                    5  |                    t                     d d d            d S # 1 swxY w Y   d S )Nr0   r$   r   r(  )rr   )rs   )	r   r&   rm   r'   r   r   rt   ru   rs   r   s     r   test_train_with_explicit_paramz0TestWord2VecModel.test_train_with_explicit_param  s   !a1QOOO)$$$z** 	F 	FKK	%2DKEEE	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F z** 	8 	8KK	%,K777	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 z** 	# 	#KK	"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#s6   "A;;A?A? "CCC3DD"Dc                 v    d t           D             }|                     t          t          j        |f           dS )z2
        Is sentences a generator object?
        c              3      K   | ]}|V  d S r  r   )r   ss     r   r   zMTestWord2VecModel.test_sentences_should_not_be_a_generator.<locals>.<genexpr>  s"      $$Qq$$$$$$r!   N)r'   r   	TypeErrorr   r&   )rM   gens     r   (test_sentences_should_not_be_a_generatorz:TestWord2VecModel.test_sentences_should_not_be_a_generator  s:     %$)$$$)X%6?????r!   c                 F    |                      t          t                     dS )zCTest if exception is raised when loading word2vec model on instanceN)r   r  r,   )rM   s    r   test_load_on_class_errorz*TestWord2VecModel.test_load_on_class_error  s    .*:;;;;;r!   c                     t          j        d          5 }|                     t          t          j        d|j        f           ddd           dS # 1 swxY w Y   dS )z3
        Is corpus_file a compressed file?
        z.bz2)suffixN)tempfileNamedTemporaryFiler   r6  r   r&   name)rM   fps     r   "test_file_should_not_be_compressedz4TestWord2VecModel.test_file_should_not_be_compressed#  s     (777 	M2i):T27OLLL	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	Ms   .AAAc                     t          j        t          d          }t          j        t          d          }|                    |           |                     |j        j        |j        j                   dS )z?Test if reset_from() uses pre-built structures from other modelr$   r%   N)r   r&   r'   rn   
reset_fromrH   rJ   r#  )rM   r+   other_models      r   test_reset_fromz!TestWord2VecModel.test_reset_from*  sf    !)q999'CCC%%%.0KLLLLLr!   c                    t          j        dddd          }|                    t                     |                    t          d|j        |j                   |                                }|                     |dk               d S )Nr$   rA   )r   r   r@   r?   T)compute_lossrr   rs   r   )	r   r&   rm   r'   rt   ru   rs   get_latest_training_lossrl   )rM   r+   training_loss_vals      r   test_compute_training_lossz,TestWord2VecModel.test_compute_training_loss1  s    !A!aAFFF)$$$IDAS\a\hiii!::<<)C/00000r!   c                 >   t          j        t          ddd          }t          d          }|                    |           t           j                            |          }|                    t          |j        d           |j        dk    sJ |j                    dS )z@The model should accept a negative ns_exponent as a valid value.r  r$   )ns_exponentr   r   zw2v_negative_exp.tstrq   N)	r   r&   r'   r
   r(   r)   rt   ru   rL  )rM   r+   r*   r&  s       r   test_negative_ns_expz&TestWord2VecModel.test_negative_ns_exp8  s    !)qRSTTT122

4(--d339U5GPQRRR'2-GG|/GGGGGGr!   )F)TFNr  )W__name__
__module____qualname__rT   rW   r\   ri   ro   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r&  r;  r?  rB  rK  r\  ro  rq  r  r  r  r  r  unittestskipIfr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r
  r  r  r  r  r  r  r   r,  r0  r2  r8  r:  rA  rE  rJ  rM  r   r!   r   r.   r.   @   s]       0 0 0<E E E4* * *
7 7 7*0 0 0
0 
0 
04 4 424 4 4,! ! ! !&! ! !
! ! !
! ! !! ! !2 2 2  6 6 6 ? ? ?1 1 11 1 11 1 1S S S,> > >
e e e$E E Eh h hi i i  

 

 



 

 


H 
H 
H
I 
I 
I^ ^ ^6) ) ).* * *,6 6 6D D D(2 2 2&
R 
R 
RV V VJ J J J6! ! !
8 8 8! ! !
8 8 8 X_%RZ7@ijjj< < kj<*. . . .8 8 8. . . .8 8 8! ! !8 8 8. . . .8 8 8	& 	& 	&) ) ).) ) ).) ) ).L L L2 2 2. . .) ) )	^ 	^ 	^C C CB5 5 55 5 51 1 11 1 11 1 11 1 1 d d d2 []]2 2 ]2 []]2 2 ]2"
# 
# 
#@ @ @< < <M M MM M M1 1 1H H H H Hr!   r.   c                       e Zd Z ej        edu d          d             Z ej        edu d          d             Z ej        edu d          d             ZdS )TestWMDFzPOT not installedc                     t          j        t          ddd          }g d}g d}|j                            ||          }|                     |dk               dS )	z.Test basic functionality with a test sentence.r0   r;   r$   r  r   r8   r   r5   r6   r   r   r9   r7   r   N)r   r&   r'   rJ   
wmdistancer   )rM   r+   	sentence1	sentence2distances        r   test_nonzerozTestWMD.test_nonzeroF  sj     !)qr1MMM666	PPP	8&&y)<< 	S)))))r!   c                    t          j        t          ddd          }g d}g d}|j                            ||          }|j                            ||          }|                     t          j        ||                     dS )z!Check that distance is symmetric.r0   r;   r$   r  rV  rW  N)r   r&   r'   rJ   rX  rl   r   r   )rM   r+   rY  rZ  	distance1	distance2s         r   test_symmetryzTestWMD.test_symmetryR  s     !)qr1MMM666	PPP	H''	9==	H''	9==	Iy99:::::r!   c                     t          j        t          d          }g d}|j                            ||          }|                     d|           dS )z:Check that the distance from a sentence to itself is zero.r$   r%   rW  r   N)r   r&   r'   rJ   rX  rH   )rM   r+   r  r[  s       r   test_identical_sentencesz TestWMD.test_identical_sentences]  sW     !)q999OOO8&&x::h'''''r!   N)	rN  rO  rP  rQ  rR  POT_EXTr\  r`  rb  r   r!   r   rT  rT  D  s        X_W%':;;	* 	* <;	* X_W%':;;; ; <;; X_W%':;;( ( <;( ( (r!   rT  c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )TestWord2VecSentenceIteratorsc           	      b   t          j        t          d          d          5 }t          j        t          d                    }|D ]N}|                     |t          j        |                                                                                     O	 ddd           dS # 1 swxY w Y   dS )z0Does LineSentence work with a filename argument?lee_background.corrbN	r   r   r	   r   ri  rH   
to_unicodereadlinesplitrM   origr'   r  s       r   &test_line_sentence_works_with_filenamezDTestWord2VecSentenceIterators.test_line_sentence_works_with_filenameh  s    Z!566== 	S -h7K.L.LMMI" S S  (8(I(I(O(O(Q(QRRRRS	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	Ss   A3B$$B(+B(c                 L   ddl m} t          j        t	          d          d          5 }|                    t	          d                    }|D ]<}|                     ||                                                                           =	 ddd           dS # 1 swxY w Y   dS )z6Does CythonLineSentence work with a filename argument?r   )word2vec_corpusfilerg  rh  N)	gensim.modelsrq  r   r   r	   CythonLineSentencerH   rk  rl  )rM   rq  rn  r'   r  s        r   -test_cython_line_sentence_works_with_filenamezKTestWord2VecSentenceIterators.test_cython_line_sentence_works_with_filenameo  s   555555Z!566== 	A+>>xH\?]?]^^I" A A  (=(=(?(?@@@@A	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	As   A"BB Bc           	         t          j        t          d          d          5 }t          j        t          j        t          d                              }|D ]N}|                     |t          j        |	                                          
                                           O	 ddd           dS # 1 swxY w Y   dS )z>Does LineSentence work with a compressed file object argument?head500.noblanks.corrh  r_  N)r   r   r	   r   ri  bz2BZ2FilerH   rj  rk  rl  rm  s       r   -test_line_sentence_works_with_compressed_filezKTestWord2VecSentenceIterators.test_line_sentence_works_with_compressed_filew  s   Z!788$?? 	S4 -ck(C]:^:^._._``I" S S  (8(I(I(O(O(Q(QRRRRS	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	Ss   BB66B:=B:c           
         t          j        t          d          d          5 }t          j        t          d          d          5 }t          j        |          }|D ]N}|                     |t          j        |                                                                                     O	 ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )zIDoes LineSentence work with a file object argument, rather than filename?rv  rh  Nri  )rM   rn  finr'   r  s        r   )test_line_sentence_works_with_normal_filezGTestWord2VecSentenceIterators.test_line_sentence_works_with_normal_file~  sm   Z!788$?? 	W4H%;<<dCC Ws$1#66	& W WE$$UE,<T]]__,M,M,S,S,U,UVVVVWW W W W W W W W W W W W W W W	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	W 	Ws6   #CA&B9-C9B=	=C B=	CCCc           
         t          j        t          j                            t          d          d          d          5 }t          j        t          j                            t          d          d          d          5 }t          j        t          d                    }|                                |                                z   }d}|D ]G}| 	                    |t          j
        ||                                                              |dz  }H	 ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )z1Does PathLineSentences work with a path argument?PathLineSentences1.txtrh  z	2.txt.bz2r   r$   N)r   r   r  r   joinr	   r   r~  	readlinesrH   rj  rl  )rM   orig1orig2r'   rn  orig_counterr  s          r   test_path_line_sentencesz6TestWord2VecSentenceIterators.test_path_line_sentences  s   ZX.A%B%BGLLdSS 	&W\BGLL2E)F)FTTVZ[[ &_d$6x@S7T7TUU	((5??+<+<< & & &E$$UE,<T,=O,P,P,V,V,X,XYYY A%LL&	& & & & & & & & & & & & & & &	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s8   AD>BD&D>&D*	*D>-D*	.D>>EEc           	         t           j                            t          d          d          }t	          j        |d          5 }t          j        |          }|D ]N}|                     |t	          j	        |
                                                                                     O	 ddd           dS # 1 swxY w Y   dS )z8Does PathLineSentences work with a single file argument?r~  r  rh  N)r  r   r  r	   r   r   r   r~  rH   rj  rk  rl  )rM   	test_filern  r'   r  s        r   !test_path_line_sentences_one_filez?TestWord2VecSentenceIterators.test_path_line_sentences_one_file  s   GLL*=!>!>HH	Z	4(( 	SD 29==I" S S  (8(I(I(O(O(Q(QRRRRS	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	S 	Ss   A&B77B;>B;N)	rN  rO  rP  ro  rt  ry  r|  r  r  r   r!   r   re  re  g  s        S S SA A AS S SW W W	& 	& 	&S S S S Sr!   re  c                       e Zd Zd ZdS )TestWord2VecScriptsc                     t           j        dddt          d          ddddd	d
ddddddg}t          |t          j                  }|                     |d           dS )z'Does Word2Vec script launch standalone?z-mz"gensim.scripts.word2vec_standalonez-trainztestcorpus.txtz-outputzvec.txtz-size200z-samplez1e-4z-binary0z-iter3z
-min_count1)argsstderrr!   N)r   
executabler	   r   
subprocessPIPErH   )rM   cmdoutputs      r    test_word2vec_stand_alone_scriptz4TestWord2VecScripts.test_word2vec_stand_alone_script  so     ND"Fh/00y'5)VsGS,	
 3z???%%%%%r!   N)rN  rO  rP  r  r   r!   r   r  r    s#        	& 	& 	& 	& 	&r!   r  r   c                 F    |                      ||k     |d|           d S )Nz is not less than )msg)rl   )rM   abr  s       r   r   r     s-    Aqqq!!#DEEEEEr!   __main__z:%(asctime)s : %(threadName)s : %(levelname)s : %(message)s)r   levelzgensim.test.test_word2vec)moduler  )/__doc__r  rQ  r  rw  r   r=  r  numpyr   testfixturesr   otr   rc  ImportErrorr   gensimr   rr  r   r   gensim.utilsr   gensim.test.utilsr	   r
   r   r   r'   r   r   rn   r    r,   TestCaser.   rT  re  r  hasattrr   setattrrN  basicConfigDEBUGmainr   r!   r   <module>r     s      				 



 



          $ $ $ $ $ $GGZ    GGG       0 0 0 0 0 0 0 0 % % % % % %                /..7n7,,," " "  H H H H H) H H HH  (  (  (  (  (h  (  (  (F0S 0S 0S 0S 0SH$5 0S 0S 0Sl
& 
& 
& 
& 
&(+ 
& 
& 
& w ,// 9F F F F G|Z888 z 6GKm    HM45555556 6s   3 	??