
    cE'                       d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZmZmZ  d dl!m"Z" d dl#Zd d	l$m%Z%m&Z&m'Z' d dlZ	 d d
l(m)Z) dZ*n# e+e,f$ r dZ*Y nw xY w ej-        e.          Z/ej0        dk    o e	j1        d          dz  dk    Z2dZ3dZ4ej5        6                    d          Z7 ej8        de7          p ej8        d          Z9g dddgddgddgdgg dgZ: G d dej;                  Z<ej=        >                    dddg          d              Z?ej=        >                    dddg          d!             Z@ej=        >                    dddg          d"             ZAej=        >                    dddg          d#             ZB eC ed$                    5 ZDeDE                                F                                G                    d%          gZHddd           n# 1 swxY w Y   d[d(ZId) ZJd* ZKd+ ZLd, ZMd- ZN G d. d/ej;                  ZOd0 ZP G d1 d2ej;                  ZQ G d3 d4ej;                  ZRd5 ZS G d6 d7ej;                  ZTd8 ZU G d9 d:ej;                  ZVd; ZW G d< d=ej;                  ZX G d> d?ej;                  ZYd@ZZ ej[        g dA ej\        dB          C          Z] G dD dEej;                  Z^dF Z_dG Z` G dH dIej;                  ZadJ Zb G dK dLej;                  ZcdM Zd eje        e9 dN           G dO dPej;                              ZfdQ Zg eje        e9 dN           G dR dSej;                              Zh G dT dUej;                  Zi G dV dWej;                  Zje.dXk    r( ejk        dYejl        Z            ejm                     dS dS )\    )divisionN)utils)LineSentence)FastTextFastTextKeyedVectors_unpack)KeyedVectors)datapathget_tmpfiletemporary_filecommon_textslee_corpus_list)TestWord2VecModel)compute_ngramscompute_ngrams_bytesft_hash_bytes)emd2TFntP       g|=i'  FT_HOMEfasttext)path)computer
artificialintelligencer   treeshumanr   graph)r   r   systemc                   H   e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z ej        edu d          d             Zd Zd Zd Zd Zd Z d Z!d Z"d  Z#d! Z$d" Z%d# Z&d$ Z'd% Z(d& Z)d' Z*d( Z+d) Z,d* Z-d+ Z.d, Z/d- Z0d. Z1d/S )0TestFastTextModelc                     t          d          | _        t          j        j                            | j                  | _        t          d          | _        d S )Nzlee_fasttext.binzlee_fasttext_new.bin)r
   test_model_filegensimmodelsr   load_facebook_model
test_modeltest_new_model_fileselfs    9lib/python3.11/site-packages/gensim/test/test_fasttext.pysetUpzTestFastTextModel.setUp?   sG    '(:;; -0DDTEYZZ#+,B#C#C       c           
      t   t          ddddddt                    }|                    t                     |                     |           |                    t          |j        |j                   |j        	                    dd	          }| 
                    |j        j        j        d
           | 
                    t          |j                  d           | 
                    |j        j        j        d         d           | 
                    |j        j        j        d         d           |                     |           |j                            dd          }|j        	                    |gd          }d |D             }| 
                    ||           t          t          ddddddt                    }|                     ||           |j        d         }| 
                    t          |          d           |j        d         }| 
                    t          |          d           d S )N      r   *   vector_size	min_counthsnegativeseedworkersbuckettotal_examplesepochsr    
   topnr1   r1   Tnorm   positiverA   c                 (    g | ]\  }}|d k    ||fS r     .0wsims      r-   
<listcomp>z3TestFastTextModel.test_training.<locals>.<listcomp>U   s)    >>>fag>!S>>>r/   minorsminor)	FT_gensimBUCKETbuild_vocab	sentencesmodel_sanitytraincorpus_countr>   wvmost_similarassertEqualvectorsshapelenvectors_vocabvectors_ngrams
get_vectormodels_equal)r,   modelsimsgraph_vectorsims2model2invocab_vecoov_vecs           r-   test_trainingzTestFastTextModel.test_trainingD   s
   bA!abZ[djkkk)$$$%   Ie.@VVVx$$W2$66)/:::UX+++/5a8"===06q92>>>%    x**7*>>%%~B%GG>>>>>u%%% 9"aRSZ\fgpvwww%((( hx([))2...(7#Wr*****r/   c           	         t          ddddddt                    }|                    t                     |                     t
          |j        ddd           |                     t
          |j        ddd	           |                     t
          |j        t          d
dd           |                     t
          |j        d d dd           |                     t
          |j        t          dd           d S )Nr1   r2   r   r3   r4   )corpus_iterableig+  )corpus_filer=   r>   )rl   r=   r>   test)rl   rm   r=   r>   )rR   rS   rT   rU   assertRaises	TypeErrorrW   r,   rc   s     r-   test_fast_text_train_parametersz1TestFastTextModel.test_fast_text_train_parametersc   s    bA!abZ[djkkk)444)U[eTU^_```)U[%XYbcdddu{I6bclm 	 	o 	o 	o)U[$TXijstuuu)U[iXYbcdddddr/   c           
      x   t          d          5 }t          j        t          |           t	          ddddddt
                    }|                    |           |                     |           |                    ||j	        |j
                   |j                            d	d
          }|                     |j        j        j        d           |                     t!          |j                  d           |                     |j        j        j        d         d           |                     |j        j        j        d         d           |                     |           |j                            d	d          }|j                            |gd          }d |D             }|                     ||           |j        d         }|                     t!          |          d           |j        d         }|                     t!          |          d           d d d            d S # 1 swxY w Y   d S )Ngensim_fasttext.tstr1   r2   r   r3   r4   rm   rm   total_wordsr>   r    r?   r@   rB   TrC   rE   rF   c                 (    g | ]\  }}|d k    ||fS rI   rJ   rK   s      r-   rO   z<TestFastTextModel.test_training_fromfile.<locals>.<listcomp>   s)    BBB&!SQ'\BaXBBBr/   rP   rQ   )r   r   save_as_line_sentencerU   rR   rS   rT   rV   rW   corpus_total_wordsr>   rY   rZ   r[   r\   r]   r^   r_   r`   ra   )r,   rm   rc   rd   re   rf   rh   ri   s           r-   test_training_fromfilez(TestFastTextModel.test_training_fromfileo   sS   122 	/k'	;???"a!RT^_hnoooE+666e$$$KKKU=U^c^jKkkk8((r(::DUX-3X>>>S]]B///UX39!<bAAAUX4:1=rBBBe$$$ !8..wT.BBLH))L>)KKEBBEBBBET5)))  (8,KS--r222hw'GS\\2...7	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/s   HH//H36H3c                    |                      t          j                  t          |j                             |                      j        j        |j        j                   |                     t          j        j        j        |j        j                             |                     t          j        j        j        |j        j                             |                     t          j        j        j	        |j        j	                             j
        r2|                     t          j        j        |j                             j        r2|                     t          j        j        |j                             t          j        j        fd          d         }|                     t          j        j        |         |j        |                              d S )Nc                 :    j                             | d          S )Ncount)rY   get_vecattr)wordrc   s    r-   <lambda>z0TestFastTextModel.models_equal.<locals>.<lambda>   s    uxG[G[\`biGjGj r/   )keyr   )r[   r^   rY   r;   
assertTruenpallcloser_   r`   r\   r7   syn1r8   syn1negmaxkey_to_index)r,   rc   rg   most_common_words    `  r-   rb   zTestFastTextModel.models_equal   sv   UXFI777&)*:;;;EH$:FI<STTUUUEH$;VY=UVVWWWEH$4fi6GHHIII8 	BOOBK
FK@@AAA> 	HOOBKv~FFGGGux4:j:j:j:jkkklmnEH-=$>	JZ@[\\]]]]]r/   c                    t          d          }t          t          dt                    }|                    |           |                     |t          j        |                     |j        }|                    |           t          j        |          }| 	                    t          j        |j        |j                             |                     t          |          t          |                     d S )Nrt   r2   )r6   r;   )r   rR   rU   rS   saverb   loadrY   r   r   r   r   r`   r[   r^   )r,   tmpfrc   rY   	loaded_wvs        r-   test_persistencez"TestFastTextModel.test_persistence   s    011)q@@@

4%!5!5666X
(-d33	B$5y7OPPQQQR#i..11111r/   c                 t   t          d          5 }t          j        t          |           t	          d          }t          |dt                    }|                    |           |                     |t          j	        |                     |j
        }|                    |           t          j	        |          }|                     t          j        |j        |j                             |                     t#          |          t#          |                     d d d            d S # 1 swxY w Y   d S )Ngensim_fasttext1.tstrt   r2   )rm   r6   r;   )r   r   ry   rU   r   rR   rS   r   rb   r   rY   r   r   r   r   r`   r[   r^   )r,   rm   r   rc   rY   r   s         r-   test_persistence_fromfilez+TestFastTextModel.test_persistence_fromfile   sE   233 	6{'	;???455D+6RRREJJteY^D%9%9:::BGGDMMM,1$77IOOBK(99;STTUUUSWWc)nn555	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6s   DD--D14D1c                 0    |                      |           d S N)model_structural_sanityrq   s     r-   rV   zTestFastTextModel.model_sanity   s    $$U+++++r/   c                    |                      |j        j        j        t	          |j                  |j        f           |                      |j        j        j        t	          |j                  |j        f           |                      |j        j        j        |j        j        |j        f           | 	                    t	          |j        j
                  t	          |j        j                             | 	                    t	          |j        j                  t	          |j        j                             |                     t          j        |j        j                                                  d           |                     t          j        |j        j                                                  d           |j        r?|                     t          j        |j                                                  d           |j        rA|                     t          j        |j                                                  d           dS dS )zyCheck a model for basic self-consistency, necessary properties & property
        correspondences, but no semantic tests.zNaN in ngramszNaN in vectors_vocabzNaN in syn1negN)r[   rY   r\   r]   r^   r5   r_   r`   r;   assertLessEqualvectors_ngrams_lockfvectors_vocab_lockfindex_to_keyr   r   isfiniteallr8   r   r7   r   rq   s     r-   r   z)TestFastTextModel.model_structural_sanity   s    	)/#eh--AR1STTT/5EHuGX7YZZZ06%J[8\]]]S!>??UXE\A]A]^^^S!=>>EHDY@Z@Z[[[EH$;<<@@BBOTTTEH$:;;??AACYZZZ> 	POOBK66::<<>NOOO8 	MOOBK
337799;KLLLLL	M 	Mr/   c                 x   	 t           j        j                            | j                  }n7# t
          $ r*}|                     d| j        d|           Y d }~nd }~ww xY wd\  }}|                     |j        j	        j
        ||f           |                     t          |j                  ||           |                     |j        j        j
        |j        j        |f           g d}|j        d         }|                     t          j        ||d                     g d}|j        d	         }|                     t          j        ||d                     |                     |j        d
           |                     |j        d
           |                     |j        d
           |                     |j        d
           |                     |j        d           |                     |j        j        d           |                     |j        j        d           |                     |j        j        d           |                     |j        j	        j
        t          |j                  |j        f           |                     |j        j        j
        |j        j        |j        f           d S )N(Unable to load FastText model from file : )i  r?   )
g(
<IgZPsM܅gM(?glV}gz3KпggTol`g{P?gJ8?g<?hundred-C6?atol)
g+ٱ̿g,g~5gT̿g?ܵڿgi5$?ggNGϿgKqU?gJOc?g5)^?	rejection           )r&   r'   r   r(   r%   	Exceptionfailr[   rY   r\   r]   r^   r`   r;   r   r   r   r6   windowr>   r8   samplemax_nmin_nr5   )	r,   rc   exc
vocab_size
model_sizeexpected_vec
actual_vecexpected_vec_oovactual_vec_oovs	            r-   test_load_fasttext_formatz+TestFastTextModel.test_load_fasttext_format   s   	fM*>>t?STTEE 	f 	f 	fIII$J^J^J^`c`cdeeeeeeee	f!)
J)/*j1IJJJUX
J???06*8UVVV
 
 
 Xi(
J4HHHIII
 
 
 +.N4D4PPPQQQ!,,,q)))q)))+++v...$///++++++)/#eh--AR1STTT06%J[8\]]]]]   ), 
A  AA c                 x   	 t           j        j                            | j                  }n7# t
          $ r*}|                     d| j        d|           Y d }~nd }~ww xY wd\  }}|                     |j        j	        j
        ||f           |                     t          |j                  ||           |                     |j        j        j
        |j        j        |f           g d}|j        d         }|                     t          j        ||d                     g d}|j        d	         }|                     t          j        ||d                     |                     |j        d
           |                     |j        d
           |                     |j        d
           |                     |j        d
           |                     |j        d           |                     |j        j        d           |                     |j        j        d           |                     |j        j        d           |                     |j        j	        j
        t          |j                  |j        f           |                     |j        j        j
        |j        j        |j        f           d S )Nr   r   )i  r?   )
g=gNg89@0?g"g8m4?g g0L
F%?g
ܺ:?gQ|a2ɿgL~r   r   r   )
gD/Xn߿g$bJ$g.񝘕gGgg.nɿg_xZg΍	K<?gY9?g^/ƿg>٬\?r   r   r   r   r   )r&   r'   r   r(   r*   r   r   r[   rY   r\   r]   r^   r`   r;   r   r   r   r6   r   r>   r8   r   r   r   r5   )	r,   	new_modelr   r   r   r   r   r   r   s	            r-   test_load_fasttext_new_formatz/TestFastTextModel.test_load_fasttext_new_format   s   	j.BB4C[\\II 	j 	j 	jIII$JbJbJbdgdghiiiiiiii	j!)
J-3j*5MNNNY\**J
CCC4:Y\=PR\<]^^^
 
 
 \),
J4HHHIII
 
 
 #k2N4D4PPPQQQ,a000)1---)1---+Q///)6222,d333+Q///+Q///-3c),6G6GI^5_```4:Y\=PR[Rg<hiiiiir   c                     |                      t                    5  t          j        j                            t          d                     d d d            d S # 1 swxY w Y   d S )Nzpang_lee_polarity_fasttext.bin)ro   NotImplementedErrorr&   r'   r   r(   r
   r+   s    r-   test_load_model_supervisedz,TestFastTextModel.test_load_model_supervised2  s    233 	c 	cM"66x@`7a7abbb	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	cs   2AA!Ac                    t           j        j                            t	          d                    }|                     d|j        v            	 |j        d          d S # t          $ r |                     d           Y d S w xY w)Nznon_ascii_fasttext.bin   kterýz7Unable to access vector for utf8 encoded non-ascii word)	r&   r'   r   r(   r
   r   rY   UnicodeDecodeErrorr   rq   s     r-   $test_load_model_with_non_ascii_vocabz6TestFastTextModel.test_load_model_with_non_ascii_vocab6  s    &::8D\;];]^^	UX-...	QHY! 	Q 	Q 	QIIOPPPPPP	Qs   A B Bc                    t           j        j                            t	          d          d          }|                     d|j        v            	 |j        d          d S # t          $ r |                     d           Y d S w xY w)Nzcp852_fasttext.bincp852encodingr   z'Unable to access vector for cp-852 word)	r&   r'   r   r(   r
   r   rY   KeyErrorr   rq   s     r-   !test_load_model_non_utf8_encodingz3TestFastTextModel.test_load_model_non_utf8_encoding>  s    &::8DX;Y;Ydk:ll	UX-...	AHY 	A 	A 	AII?@@@@@@	As   A   BBc                 J   d}| j         j                            |          }|d         \  }}| j         j        |         }| j         j        |         }| j         j                            ||                    dd                    d         }|                     ||d           d S )Nsomeoovwordr   r2   r   )places)r)   rY   rZ   cosine_similaritiesreshapeassertAlmostEqual)r,   r   rZ   top_neighbortop_similarityv1v2top_similarity_directs           r-   test_oov_similarityz%TestFastTextModel.test_oov_similarityF  s    )66t<<'3A$n_%_- $ 2 F Fr2::VWY[K\K\ ] ]^_ `~/DQOOOOOr/   c           	         |                      t          j        | j        j                            ddgddg          d                     |                     | j        j                            dgdg          | j        j                            dgdg                     |                      t          j        | j        j                            ddgddg          d                     |                     | j        j                            dgdg          | j        j                            dgdg                     d S )Ntheand      ?nightnights)r   r   r   r)   rY   n_similarityr[   r+   s    r-   test_n_similarityz#TestFastTextModel.test_n_similarityO  s3   DO$6$C$CUENUZ\aTb$c$cehiijjjO++UGeW==t?Q?^?^`e_finho?p?p	r 	r 	r 	DO$6$C$CWhDWZbdkYl$m$morsstttO++WIzBBO++XJ	BB	
 	
 	
 	
 	
r/   c                 f   |                      t          j        | j        j                            dd          d                     |                     | j        j                            dd          | j        j                            dd                     |                      t          j        | j        j                            dd          d                     |                     | j        j                            dd          | j        j                            dd                     d S )Nr   r   r   r   r   )r   r   r   r)   rY   
similarityr[   r+   s    r-   test_similarityz!TestFastTextModel.test_similarity[  s   DO$6$A$A%$O$OQTUUVVV+66ueDDdoFXFcFcdikpFqFqrrrDO$6$A$A(H$U$UWZ[[\\\O))'8<<do>P>[>[\dfm>n>n	p 	p 	p 	p 	pr/   c                 ^   |                      t          | j        j                            ddgd                    d           |                      | j        j                            d          | j        j                            dg                     |                      t          | j        j                            ddgd                    d           |                      | j        j                            d          | j        j                            dg                     d S )	Nr   r   r   rF   rG   r   r   r@   )r[   r^   r)   rY   rZ   r+   s    r-   test_most_similarz#TestFastTextModel.test_most_similard  s   T_/<<uen[\<]]^^`abbb+88??ASA`A`kpjqA`ArArsssT_/<<gx=PWX<YYZZ\]^^^O++H55t7I7V7Vai`j7V7k7k	m 	m 	m 	m 	mr/   c                    |                      t          | j        j                            ddgd                    d           |                      | j        j                            d          | j        j                            dg                     |                      t          | j        j                            ddgd                    d           |                      | j        j                            d          | j        j                            dg                     |                      | j        j                            dd          | j        j                            dgdg	                     d S )
Nr   r   r   rF   r   r   r   r@   )rG   r8   )r[   r^   r)   rY   most_similar_cosmulr+   s    r-   test_most_similar_cosmulz*TestFastTextModel.test_most_similar_cosmulm  sv   T_/CCeUZ^bcCddeeghiiiO22599O22UG2DD	F 	F 	F 	T_/CCWhDW^_C``aacdeeeO228<<O22XJ2GG	I 	I 	I 	O225%@@O22UGug2VV	X 	X 	X 	X 	Xr/   c                    |                      d| j        j        j        v            |                      t	          j        | j        j        d         | j        j        dg                              |                     d| j        j        j        v            |                      t	          j        | j        j        d         | j        j        dg                              d S Nr   r   )r   r)   rY   r   r   r   assertFalser+   s    r-   test_lookupzTestFastTextModel.test_lookup|  s    4?#5#BBCCCDO$6w$?ASU\T]A^__```T_%7%DDEEEDO$6x$@$/BTV^U_B`aabbbbbr/   c                    |                      d| j        j        j        v            |                      d| j        j        v            |                     | j        j                            d                     |                     d| j        j        j        v            |                      d| j        j        v            d S r   )r   r)   rY   r   r   has_index_forr+   s    r-   test_containszTestFastTextModel.test_contains  s    4?#5#BBCCC4?#55666+99(CCDDDT_%7%DDEEEDO$6677777r/   FzPOT not installedc                     ddg}g d}| j         j                            ||          }|                     t	          d          |           d S )Nr   payment)r   forestspaymentsinf)r)   rY   
wmdistanceassertNotEqualfloat)r,   docoov_docdists       r-   test_wm_distancez"TestFastTextModel.test_wm_distance  sS    	"333!,,S'::E%LL$/////r/   c                    t          d&i ddddddddd	d
dddd
dd
dddddddddddddddddddt          }t          t          d                    }|                    |           t          j        |j        j        d                   }|	                    ||j
        |j                   |                     ||j        j        d         k                                               |j                            d d!          }d" |D             }g d#}t          |                              |          }t#          |          }|                     |d$d%|||fz             d S )'Nr5   0   sgr   	cbow_meanr2   alpha皙?r   r   r7   r8   r6   r>   r?   batch_wordsr   word_ngramsr   MbP?r   r   r   r   sorted_vocabr:   	min_alpha        r;   lee_background.corr<   r   r@   c                     g | ]\  }}|S rJ   rJ   rL   r   distances      r-   rO   z<TestFastTextModel.test_cbow_neg_training.<locals>.<listcomp>      FFF&6tXTFFFr/   
night.night,eightfightmonthhearings
Washingtonremains	overnightrunning   *only %i overlap in expected %s & actual %srJ   rR   rS   r   r
   rT   r   copyrY   r\   rW   rX   r>   r   r   rZ   setintersectionr^   assertGreaterEqual	r,   model_gensimlee_dataorig0sims_gensimsims_gensim_wordsexpected_sims_wordsoverlapsoverlap_counts	            r-   test_cbow_neg_trainingz(TestFastTextModel.test_cbow_neg_training  s(     E E EE qE,-AE59TEBC!EHIETUTUEaE "E04EBC!ELPDEXYXYEababE E %&AE 25E >DVE
  )= > >??  ***/2338L4MVbVijjj%<?#:1#==BBDDEEE"o2272DDFF+FFF
 
 
 ())667JKKH18MK^`q;rr	t 	t 	t 	t 	tr/   c           	      d   t          d          5 }t          d(i dddddddd	d
ddddddddddddddddddddddddddt          }t          t	          d                    }t          j        ||           |                    |            t          j	        |j
        j        d                   }|                    ||j        |j        !           |                     ||j
        j        d         k                                               |j
                            d"d#          }d$ |D             }g d%}t%          |                              |          }t)          |          }	|                     |	d&d'|	||fz             d d d            d S # 1 swxY w Y   d S ))Nrt   r5   r   r   r   r   r2   r   r   r   r   r7   r8   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r  ru   rv   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   zETestFastTextModel.test_cbow_neg_training_fromfile.<locals>.<listcomp>       J J J*:4 J J Jr/   r  r  r  rJ   r   rR   rS   r   r
   r   ry   rT   r   r  rY   r\   rW   rz   r>   r   r   rZ   r  r  r^   r  
r,   rm   r  r  r   r!  r"  r#  r$  r%  s
             r-   test_cbow_neg_training_fromfilez1TestFastTextModel.test_cbow_neg_training_fromfile  s   122 !	xk$ I I IBI#$1I01I9=IFGaILMAIXYXYI!I$&BI48DIFGaIPTPTI\]\]IefefI QI )*I 69SI BHIL
 $H-A$B$BCCH'+>>>$$$===GLO3A677E;+7+J&2&9  ; ; ; e|'>q'AAFFHHIII&/66wR6HHK J Jk J J J
# 
# 
# ,--::;NOOHMMM##q<Obdu?vvx x x?!	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	xs   FF%%F),F)c                    t          d'i dddddddddd	d
ddd	dd	dddddddddddddddddddt          dz  }t          t          d                    }|                    |           t          j        |j        j        d                   }|	                    ||j
        |j                    |                     ||j        j        d         k                                               |j                            d!d"          }d# |D             }g d$}t          |                              |          }t#          |          }|                     |d%d&|||fz             d S )(Nr5   r   r   r2   r   r   皙?r   r   r7   r   r8   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;      r  r<   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   z:TestFastTextModel.test_sg_neg_training.<locals>.<listcomp>  r
  r/   
r  r  r  r  
overnight.r  landfirmsinglesdeathr  r  rJ   r  r  s	            r-   test_sg_neg_trainingz&TestFastTextModel.test_sg_neg_training  s,     I I II qI,-AI5:UICD1IIJIUVUVIaI "I04IBC!ILPDIXYXYIababI I %&AI 25I >DaZZI
  )= > >??  ***/2338L4MVbVijjj%<?#:1#==BBDDEEE"o2272DDFF+FFF
 
 
 ())667JKKH18MK^`q;rr	t 	t 	t 	t 	tr/   c           	      j   t          d          5 }t          d)i ddddddddd	d
dddd
dd
dddddddddddddddddddt          dz  }t          t	          d                     }t          j        ||           |                    |!           t          j	        |j
        j        d                   }|                    ||j        |j        "           |                     ||j
        j        d         k                                               |j
                            d#d$          }d% |D             }g d&}t%          |                              |          }t)          |          }	|                     |	d'd(|	||fz             d d d            d S # 1 swxY w Y   d S )*Nrt   r5   r   r   r2   r   r   r.  r   r   r7   r   r8   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r/  r  ru   rv   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   zCTestFastTextModel.test_sg_neg_training_fromfile.<locals>.<listcomp>
  r)  r/   r1  r  r  rJ   r*  r+  s
             r-   test_sg_neg_training_fromfilez/TestFastTextModel.test_sg_neg_training_fromfile  s   122 !	xk$ M M MBM#$1M01M9>MGHqMMNQMYZYZM!M$&BM48DMFGaMPTPTM\]\]MefefM QM )*M 69SM BH!ML
 $H-A$B$BCCH'+>>>$$$===GLO3A677E;+7+J&2&9  ; ; ; e|'>q'AAFFHHIII&/66wR6HHK J Jk J J J
# 
# 
# ,--::;NOOHMMM##q<Obdu?vvx x x?!	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	x !	xs   FF((F,/F,c           	      8   t          t          dddddt                    }|                     t	          |j                  d           |                     |j                            dd          d           |                    t          d	
           |                     t	          |j                  d           |                     |j                            dd          d           |                     |j                            dd          d           d S )Nr1   r2   r3   r   r5   r6   r9   r7   r8   r;   r    r~   r   Tupdate   r/  r   )	rR   rU   rS   r[   r^   rY   r   rT   new_sentences)r,   model_hss     r-   test_online_learningz&TestFastTextModel.test_online_learning  s    YB!"QR]^gmnnnX[))2...00'BBAFFF]4888X[))2...00'BBAFFF00wGGKKKKKr/   c                 *   t          d          5 }t          d          5 }t          j        t          |           t          j        t          |           t          |dddddt                    }|                     t          |j	                  d           |                     |j	        
                    dd	          d
           |                    |d           |                     t          |j	                  d           |                     |j	        
                    dd	          d           |                     |j	        
                    dd	          d           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr   gensim_fasttext2.tstr1   r2   r3   r   rm   r5   r6   r9   r7   r8   r;   r    r~   r   Trm   r>  r?  r/  r   )r   r   ry   rU   r@  rR   rS   r   r^   rY   r   rT   r[   )r,   rm   new_corpus_filerA  s       r-   test_online_learning_fromfilez/TestFastTextModel.test_online_learning_fromfile%  s   233 	O{566	O:I'	;???'GGG 'R12RS^_hnp p pHOOC,,b111OOHK33GWEEqIII  _T JJJS--r222OOHK33GWEEqIIIOOHK33L'JJANNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	Os5   FEE0$F0E4	4F7E4	8FFFc           	         t          d          }t          t          dddddt                    }|                    |           t          j        |          }|                     t          |j                  d           |	                    t          d           |                    t          |j        |j        	           |                     t          |j                  d
           d S )Nrt   r1   r   r3   r   r<  Tr=  r<   r?  )r   rR   rU   rS   r   r   r   r^   rY   rT   r@  rW   rX   r>   r[   )r,   r   	model_negs      r-   test_online_learning_after_savez1TestFastTextModel.test_online_learning_after_save4  s    011iR12RS^_hnooo	tN4((	IL))2...mD999i6LU^UefffY\**B/////r/   c           	      |   t          d          }t          t          dddddt                    }t          j        j                            ||           t          j        j                            |          }| 	                    t          |j                  d           |                     t          |j                  t          |j        j                             |                     t          |j                  t          |j        j                             |                    t           d           |                    t           |j        |j        	           |                     t          |j                  d
           |                     t          |j                  t          |j        j                             |                     t          |j                  t          |j        j                             t          d          }t          j        j                            ||           d S )Nzgensim_ft_format.tstr1   r   r3   r   r<  Tr=  r<   r?  zgensim_ft_format2.tst)r   rR   rU   rS   r&   r'   r   save_facebook_modelr(   r   r^   rY   r[   r\   r_   rT   r@  rW   rX   r>   )r,   r   rc   model_reloadtmpf2s        r-   ,test_online_learning_through_ft_format_savesz>TestFastTextModel.test_online_learning_through_ft_format_saves>  s   122)qraZ[djkkk225$???}-AA$GGLO,,b111\_--s<?3J/K/KLLL\_--s<?3P/Q/QRRR  t <<<=9R[g[nooo\_--r222\_--s<?3J/K/KLLL\_--s<?3P/Q/QRRR34422<GGGGGr/   c                    t          d          5 }t          d          5 }t          j        t          |           t          j        t          |           t          d          }t          |dddddt                    }|                    |           t          j	        |          }| 
                    t          |j                  d           |                    |d	
           |                    ||j        |j                   |                     t          |j                  d           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nr   rD  rt   r1   r   r3   r   rE  TrF  rv   r?  )r   r   ry   rU   r@  r   rR   rS   r   r   r   r^   rY   rT   rW   rz   r>   r[   )r,   rm   rG  r   rJ  s        r-   (test_online_learning_after_save_fromfilez:TestFastTextModel.test_online_learning_after_save_fromfileN  s   233 	4{566	4:I'	;???'GGG455D!'R12RS^_hnp p pINN4   !t,,IOOC	--r222!!od!KKKOOYEa#,#3  5 5 5S..333	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4s5   EDD=1E=E	EE	EEEc                    g g }}t           D ]1}d|v r|                    |           |                    |           2|                     t          d |D                                  |                    |           |j        j                                        }|                    ||j	        |j
                   |                     t          j        t          j        ||j        j                                       |                     t          j        t          j        |j        j        |j        j                                       |                     d|j        j        v            |                    |d           |                     |j        j        j        dk               |                     d|j        j        v            t          j        |j        j                  }|                    |t%          |          |j
                   |                     t          j        |j        j        |                     |j                            dgdg          }|                     d|           d S )	N	terrorismc              3      K   | ]}d |vV  	dS )rT  NrJ   )rL   lines     r-   	<genexpr>z2TestFastTextModel.online_sanity.<locals>.<genexpr>f  s'      GGKt3GGGGGGr/   r<   Tr=  float32warr  )list_corpusappendr   r   rT   rY   r_   r  rW   rX   r>   r   r   equalr\   r   r`   dtyper^   r   r   
assertLess)r,   rc   terroothersrV  
start_vecs	orig0_allrN   s           r-   online_sanityzTestFastTextModel.online_sanity_  s$   Bv 	$ 	$Dd" $T""""d####GGGGGGGHHH&!!!X+0022
F5+=elSSSUX5K L LMMNNN)958;Q R RSSTTT(==>>>%---/5BCCCux'<<===GEH344	E#e**U\JJJUX%<iHHIIIh##UGk];;C     r/   c                 j    t          ddddddddt          	  	        }|                     |           d S )Nr2   r  r   r   r3   	r   r   r7   r8   r6   r>   r9   r:   r;   rR   rS   rc  rq   s     r-   test_sg_hs_onlinez#TestFastTextModel.test_sg_hs_onlinex  @    QqQaPQXZdentuuu5!!!!!r/   c                 j    t          ddddddddt          	  	        }|                     |           d S )Nr2   r  r   r   r   r3   re  rf  rq   s     r-   test_sg_neg_onlinez$TestFastTextModel.test_sg_neg_online|  rh  r/   c                 n    t          ddddddddddt                    }|                     |           d S )Nr   r2   r   r  r   r3   )r   r   r   r   r7   r8   r6   r>   r9   r:   r;   rf  rq   s     r-   test_cbow_hs_onlinez%TestFastTextModel.test_cbow_hs_online  sN    AT!AQR[\ceop
 
 
 	5!!!!!r/   c                 p    t          dddddddddddt                    }|                     |           d S )Nr   r2   r   r  r   r3   )r   r   r   r   r7   r8   r6   r>   r9   r:   r   r;   rf  rq   s     r-   test_cbow_neg_onlinez&TestFastTextModel.test_cbow_neg_online  sL    AT!AAa
 
 
 	5!!!!!r/   c                 `   t          dddt                    }|                    t                     t	          j        |j        j                  }|j                                         | 	                    t	          j
        t	          j        |j        j        |                               d S )Nr1   r2   r3   )r5   r6   r9   r;   )rR   rS   rT   rU   r   r  rY   r_   adjust_vectorsr   r   r\  )r,   rc   original_syn0_vocabs      r-   test_get_vocab_word_vecsz*TestFastTextModel.test_get_vocab_word_vecs  s    bABvNNN)$$$ geh&<==!!!rx(>@STTUUVVVVVr/   c                    t          d          }t          t          ddt                    }|j                            |d           t          j        |d          }|                     t          |j                  t          |                     | 
                    t          j        |j        d         |d                              dS )	z2Test storing/loading the model in word2vec format.zgensim_fasttext_w2v_format.tstr2   r1   )r6   r5   r;   T)binaryr   N)r   rR   rU   rS   rY   save_word2vec_formatr	   load_word2vec_formatr[   r^   r   r   r   )r,   r   rc   loaded_model_kvs       r-    test_persistence_word2vec_formatz2TestFastTextModel.test_persistence_word2vec_format  s    ;<<)qbPPP%%d4%888&;DNNNUXO(<(<===EHW$5w7OPPQQQQQr/   c                 *   t          ddd          }|                    t                     |                     |j        j        j        d           |                    t          d           |                     |j        j        j        d           d S )Nr1   r2      )r5   r6   r;   )rz  r1   Tr=  )rR   rT   rU   r[   rY   r`   r]   r@  rq   s     r-   test_bucket_ngramsz$TestFastTextModel.test_bucket_ngrams  s    bAbAAA)$$$06AAA-55506AAAAAr/   c                    t          dddddt                    }|                    t                     |                                }|                     |d         d           |                     |d         d	           |                     |d
         d	           |                     |d         d	           |                     |d         |j        t          j        t          j	                  j
        z  t          z             |                     |d         d           |                     |d         d           d S )Nr2   r1   r   r   )r   r7   r5   r8   r6   r;   vocabi
  
syn0_vocab   r   r   syn0_ngramsbuckets_wordi  totalib )rR   rS   rT   rU   estimate_memoryr[   r5   r   r]  rX  itemsize)r,   rc   reports      r-   test_estimate_memoryz&TestFastTextModel.test_estimate_memory  s!   Q1"qAV\]]])$$$&&(($///-s333---	*C000 	.0ABHRZDXDXDa0adj0jkkk/555&11111r/   c                 J   d}t          j        t          |                    }|                     |j        j        j        dk               |                     t          |j                  dk               |                     t          |j        j                  dk               | 	                    |j
                   |                     |j        j        t          |j                  |j        fk               |                     |j        j        j        dk               |                     |j        j        dk               |                     |j        j        j        d           |                     |j        j        j        d           d}t          j        t          |                    }|                     |j        j        j        dk               |                     t          |j                  dk               |                     t          |j        j                  dk               | 	                    |j
                   |                     |j        j        t          |j                  |j        fk               |                     |j        j        j        dk               |                     |j        j        dk               |                     |j        j        j        d           |                     |j        j        j        d           dS )z2Test loading fasttext models from previous versionfasttext_old)r1   d   r1   )r1   )逄 r  fasttext_old_sepN)rR   r   r
   r   rY   r\   r]   r^   r   assertIsNonerz   r   r5   vectors_lockf	cum_tabler[   r_   r`   )r,   
model_filerc   s      r-   obsolete_testLoadOldModelz+TestFastTextModel.obsolete_testLoadOldModel  s|    $
x
3344(.);<<<EH+,,,EH122b8999%2333+EHu?P/QQRRR.4>???-7888/5yAAA06GGG (
x
3344(.);<<<EH+,,,EH122b8999%2333+EHu?P/QQRRR.4>???-7888/5yAAA06GGGGGr/   c                    g d}| j         j                            |          }d}t          |          }||k    sJ | j         j        d         }|d         }t	          j        ||          sJ t          j                            |d         |d         z
            }t          j                            |d         |d         z
            }||k    sJ dS )z+Test vectors_for_all can infer new vectors.
responding
approachedchairmanan out-of-vocabulary wordanother out-of-vocabulary wordr   r  r  r  N)r)   rY   vectors_for_allr^   r   r   linalgrD   )r,   wordsr  expected	predictedsmaller_distancegreater_distances          r-   #test_vectors_for_all_with_inferencez5TestFastTextModel.test_vectors_for_all_with_inference  s    
 
 
 /,<<UCC((	9$$$$?%l3#L1	{8Y/////9>>78>?@
 
 9>>78l+,
 
  "2222222r/   c                     g d}| j         j                            |d          }d}t          |          }||k    sJ | j         j        d         }|d         }t	          j        ||          sJ dS )z@Test vectors_for_all does not infer new vectors when prohibited.r  F)allow_inferencer   r  N)r)   rY   r  r^   r   r   )r,   r  r  r  r  s        r-   &test_vectors_for_all_without_inferencez8TestFastTextModel.test_vectors_for_all_without_inference  s    
 
 
 /,<<UTY<ZZ((	9$$$$?%l3#L1	{8Y///////r/   c                    t          t          ddd          }t          d          }|                    |           t          j        |          }|                    t          |j        d           |j        dk    sJ |j                    dS )z@The model should accept a negative ns_exponent as a valid value.r   r2   )ns_exponentr6   r:   zfasttext_negative_exp.tstr<   N)rR   rU   r   r   r   rW   rX   r  )r,   rc   r   loaded_models       r-   test_negative_ns_expz&TestFastTextModel.test_negative_ns_exp  s    )q!LLL677

4 ~d++9U5GPQRRR'2-GG|/GGGGGGr/   N)2__name__
__module____qualname__r.   rj   rr   r{   rb   r   r   rV   r   r   r   r   r   r   r   r   r   r   r   r   r   unittestskipIfPOT_EXTr   r&  r,  r7  r:  rB  rH  rK  rP  rR  rc  rg  rj  rl  rn  rr  rx  r{  r  r  r  r  r  rJ   r/   r-   r#   r#   =   s       D D D
+ + +>
e 
e 
e/ / /<^ ^ ^
2 
2 
26 6 6, , ,M M M3^ 3^ 3^j3j 3j 3jjc c cQ Q QA A AP P P

 

 

p p pm m mX X Xc c c8 8 8 X_W%':;;0 0 <;0t t t>"x "x "xHt t t@"x "x "xHL L LO O O0 0 0H H H 4 4 4"! ! !2" " "" " "" " "" " "W W WR R RB B B2 2 2H H H<3 3 3:0 0 0&H H H H Hr/   r#   shrink_windowsc                    t          d)i ddddddddd	d
dddddd
dddddddddddddddddddt          d| }t          t          d                    }|                    |           t          j        |j        j        d                   }|	                    ||j
        |j                    ||j        j        d         k                                    rJ |j                            d!d"          }d# |D             }g d$}t          |                              |          }t!          |          }d%| d&| d'| }	|d(k    s
J |	            d S )*Nr5   r   r   r   r   r2   r   r   r   r   r7   r8   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r  r  r<   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   z)test_cbow_hs_training.<locals>.<listcomp>      BBB"24BBBr/   
r  r  rights
kilometresinr  	accordingflightsduringcomesonly  overlap in expected 
 & actual r  rJ   rR   rS   r   r
   rT   r   r  rY   r\   rW   rX   r>   r   rZ   r  r  r^   
r  r  r  r   r!  r"  r#  r$  r%  messages
             r-   test_cbow_hs_trainingr    s1    ` ` `B`1`()`15`>?a`DEA`PQPQ`!`B`,0D`>?a`HL`TUTU`]^]^` Q` !"` .1S` :@` Q_P^`L
 H%9::;;HX&&&GLO+A.//Ex0IR^Refff03388:::::/..wR.@@KBBkBBB
 
 
 $%%223FGGHMMMlmll:MllYjllGA&&w&&&&&r/   c                 L   t          d          5 }t          d,i dddddddd	d
ddddddddddddddddddddddddddt          dz  d | }t          t	          d!                    }t          j        ||           |                    |"           t          j	        |j
        j        d                   }|                    ||j        |j        #           ||j
        j        d         k                                    rJ |j
                            d$d%          }d& |D             }g d'}t#          |                              |          }t'          |          }	d(|	 d)| d*| }
|	d+k    s
J |
            	 d d d            d S # 1 swxY w Y   d S )-Nrt   r5   r   r   r   r   r2   r   r   r   r   r7   r8   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r/  r  r  ru   rv   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   z2test_cbow_hs_training_fromfile.<locals>.<listcomp>?  r
  r/   r  r  r  r  r  rJ   r   rR   rS   r   r
   r   ry   rT   r   r  rY   r\   rW   rz   r>   r   rZ   r  r  r^   r  rm   r  r  r   r!  r"  r#  r$  r%  r  s              r-   test_cbow_hs_training_fromfiler  ,  s   	-	.	.  ++  h h hh qh,-Ah59ThBC!hHIhTUTUhah "h04hBC!hLPDhXYXYhababh h %&Ah 25h >DaZZh YgXfh
  )= > >??#Hk:::  [ 999/233{'3'F"."5 	 	7 	7 	7 \_4Q77<<>>>>>"o2272DDFF+FFF
 
 
 ())667JKKHp-pp>Qpp]npp!**7****A +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +s   E;FF Fc                    t          d)i dddddddddd	d
ddddd	dddddddddddddddddddt          d| }t          t          d                    }|                    |           t          j        |j        j        d                   }|	                    ||j
        |j                    ||j        j        d         k                                    rJ |j                            d!d"          }d# |D             }g d$}t          |                              |          }t!          |          }d%| d&| d'| }	|d(k    s
J |	            d S )*Nr5   r   r   r2   r   r   r.  r   r   r7   r8   r   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r  r  r<   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   z'test_sg_hs_training.<locals>.<listcomp>_  r  r/   
r  r  r  niner  crewr2  manslaughternorthflightr  r  r  r  rJ   r  r  s
             r-   test_sg_hs_trainingr  Q  s1    ` ` `B`1`()`16`?@q`EFQ`QRQR`!`B`,0D`>?a`HL`TUTU`]^]^` Q` !"` .1S` :@` Q_P^`L
 H%9::;;HX&&&GLO+A.//Ex0IR^Refff03388:::::/..wR.@@KBBkBBB
 
 
 $%%223FGGHMMMlmll:MllYjllGA&&w&&&&&r/   c                 F   t          d          5 }t          d+i ddddddddd	d
dddddd
dddddddddddddddddddt          d| }t          t	          d                     }t          j        ||           |                    |!           t          j	        |j
        j        d                   }|                    ||j        |j        "           ||j
        j        d         k                                    rJ |j
                            d#d$          }d% |D             }g d&}t#          |                              |          }t'          |          }	d'|	 d(| d)| }
|	d*k    s
J |
            	 d d d            d S # 1 swxY w Y   d S ),Nrt   r5   r   r   r2   r   r   r.  r   r   r7   r8   r   r6   r>   r?   r   r   r  r   r  r   r   r   r   r  r:   r  r  r;   r  r  ru   rv   r   r@   c                     g | ]\  }}|S rJ   rJ   r  s      r-   rO   z0test_sg_hs_training_fromfile.<locals>.<listcomp>  r
  r/   r  r  r  r  r  rJ   r  r  s              r-   test_sg_hs_training_fromfiler  r  s   	-	.	.  ++  d d dd qd,-Ad5:UdCD1dIJdUVUVdad "d04dBC!dLPDdXYXYdababd d %&Ad 25d >DVd UcTbd
  )= > >??#Hk:::  [ 999/233{'3'F"."5 	 	7 	7 	7 \_4Q77<<>>>>>"o2272DDFF+FFF
 
 
 ())667JKKHp-pp>Qpp]npp!**7****A +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +  +s   E8FFFtoy-data.txt r  r   c                     t          | dddd|          }|                    t                     |                    t          t	          t                    |j                   |S )Nr   r   r2   r   )r;   r5   r   r:   r   r6   r<   )rR   rT   TOY_SENTENCESrW   r^   r>   )r;   r6   rc   s      r-   train_gensimr    s[     V$RXdmnnnE	m$$$	KKc-.@.@KVVVLr/   c                  l    t          d          } t          j        j                            |           }|S )Nztoy-model.bin)r
   r&   r'   r   r(   )r   rc   s     r-   load_nativer    s.     O$$DM"66t<<ELr/   c              #     K   |                                   | D ]n}|                                                    d          }|                    d          }d |D             }|t	          j        |t          j                  fV  od S )Nr  r   c                 ,    g | ]}t          |          S rJ   r   )rL   cs     r-   rO   zload_vec.<locals>.<listcomp>  s    ,,,q%((,,,r/   r]  )readlinestripsplitpopr   arrayrX  )finrV  columnsr   vectors        r-   load_vecr    s      LLNNN 7 7**,,$$T**{{1~~,,G,,,BHV2:66666666	7 7r/   c                 *     fd j         D             }fdj         D             }|                    ||           |                     j        j        j        j                   |                     j        j        j        j                   d S )Nc                 >    i | ]}|                     |d           S r~   r   )rL   r   as     r-   
<dictcomp>zcompare_wv.<locals>.<dictcomp>  )    JJJCsAMM#w//JJJr/   c                 >    i | ]}|                     |d           S r  r  )rL   r   bs     r-   r  zcompare_wv.<locals>.<dictcomp>  r  r/   )r   r[   r\   r]   r_   )r  r  ta_countb_counts   ``   r-   
compare_wvr    s    JJJJ1>JJJGJJJJ1>JJJGMM'7### MM!)/19?333 MM!/')>?????r/   c                 Z    |                     | j        j        |j        j                   d S r   )r[   r   r]   r  r  r  s      r-   
compare_nnr    s&     MM!)/19?33333r/   c                 j   |                     | j        |j                   |                     | j        |j                   |                     | j        |j                   |                     | j        |j                   |                     | j        |j                   |                    t          j        | j	        |j	                             |                     | j
        |j
                   |                     | j        |j                   |                     | j        |j                   d S r   )r[   max_vocab_sizer6   r   r  	null_wordr   r   r   r  	raw_vocabmax_final_vocabr  r  s      r-   compare_vocabularyr    s    MM!"A$4555MM!+q{+++MM!(AH%%%MM!.!.111MM!+q{+++LLQ[!+66777MM!+q{+++MM!#Q%6777MM!-/////r/   c                   b    e Zd ZdZej        Zd Zd Zd Zd Z	d Z
d Zd Zd	 Zd
 Zd Zd ZdS )NativeTrainingContinuationTestNc                 |    g dg dg dg dg dg dg dd}d	 |                                 D             | _        d S )
N)gAҧU?gU]o?gY9?g+H3gF?)g+PO?g,Ω?g6o1gɍ"gZ{,}?)g?gUka9?gz?g() gr&"?)g}8H?gڴ?g`#I?g\p?gbt?)g<+?gv?gr&"?g0_^}g\4?)g8+gz2蛔g$5gjTgE=0v?)gI%r?g9̗?g`X|[?gvݰmgI2?)quickbrownfoxjumpsoverlazydogc                 V    i | ]&\  }}|t          j        |t           j                   'S )r  )r   r  rX  )rL   r   arrs      r-   r  z8NativeTrainingContinuationTest.setUp.<locals>.<dictcomp>  s?     
 
 
c "(3bj111
 
 
r/   )itemsoov_expected)r,   r  s     r-   r.   z$NativeTrainingContinuationTest.setUp  s    
 GFFIIIFFFIIIEEELLLDDD
 

 
%^^--
 
 
r/   c                    t                      }t          j        t          d          dd          5 }t	          t          |                    }ddd           n# 1 swxY w Y   |                                D ]I\  }}|j                            |          }| 	                    t          j        ||d                     J|                     |           dS )z2Test for correct representation of in-vocab words.ztoy-model.vecrutf-8r   Nh㈵>r   )r  r   openr
   dictr  r  rY   ra   r   r   r   r   )r,   nativer  r  r   expected_vectoractual_vectors          r-   test_in_vocabz,NativeTrainingContinuationTest.test_in_vocab  s   Z113III 	+SHSMM**H	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ &.^^%5%5 	T 	T!D/"I0066MOOBKTRRRSSSS$$V,,,,,s   AA #A c                    t                      }| j                                        D ]I\  }}|j                            |          }|                     t          j        ||d                     J|                     |           dS )z6Test for correct representation of out-of-vocab words.r  r   N)	r  r	  r  rY   ra   r   r   r   r   )r,   r  r   r  r  s        r-   test_out_of_vocabz0NativeTrainingContinuationTest.test_out_of_vocab  s    %)%6%<%<%>%> 	T 	T!D/"I0066MOOBKTRRRSSSS$$V,,,,,r/   c                 `   t                      }t                      }|                     |j        j        |j        j                   t          |j        |j        |            t          |||            t          |||            |                     |           |                     |           dS )z:Compare models trained on toy data.  They should be equal.N)	r  r  r[   rY   r;   r  r  r  r   )r,   trainedr  s      r-   test_sanityz*NativeTrainingContinuationTest.test_sanity  s    ..*FI,<=== 	7:vy$///7FD1117FD)))$$W---$$V,,,,,r/   c                    t                      }|                     |           d}|j                            |                                          }|                    t          t          t                    |j                   |j                            |                                          }| 	                    ||           |                     |           dS )z1Ensure that training has had a measurable effect.societyr<   N)
r  r   rY   ra   tolistrW   rZ  r^   r>   r   )r,   r  r   
old_vector
new_vectors        r-   test_continuation_nativez7NativeTrainingContinuationTest.test_continuation_native#  s    $$V,,, Y))$//6688
[[1A1A&-XXXY))$//6688
J
333$$V,,,,,r/   c                    t          d          }|                     |           t          j        |j        j                  }d}|j                            |                                          }|                    t          t          t                    |j                   t          j        |j        j                  }|                     t          j        ||                     |j                            |                                          }|                     ||           |                     |           dS )z;Ensure that continued training has had a measurable effect.r   )r6   r   r<   N)r  r   r   r  rY   r`   ra   r  rW   rZ  r^   r>   r   r   r   )r,   rc   vectors_ngrams_beforer   r  vectors_ngrams_afterr  s          r-   test_continuation_gensimz7NativeTrainingContinuationTest.test_continuation_gensim5  s   q)))$$U+++ "(? @ @X((..5577
KK0@0@VVV!wux'>??%:<PQQRRRX((..5577
J
333$$U+++++r/   c                    d}t          |          5  t                                          |           t          j        |          }|                     |           |                    t          t          t                    |j	                   |                    |           |                     |           ddd           dS # 1 swxY w Y   dS )ETest that serialization works end-to-end.  Not crashing is a success.ztest_ft_saveload_native.modelr<   N)
r   r  r   rR   r   r   rW   rZ  r^   r>   r,   
model_namerc   s      r-   test_save_load_gensimz4NativeTrainingContinuationTest.test_save_load_gensimG  s     5
J'' 	0 	0NN
+++N:..E((///KKC4D4DU\KZZZJJz"""((///	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0   B)CCCc                    d}t          |          5  t                                          |           t          j        |          }|                     |           |                    t          t          t                    |j	                   |                    |           |                     |           ddd           dS # 1 swxY w Y   dS )r$  ztest_ft_saveload_fb.modelr<   N)
r   r  r   rR   r   r   rW   rZ  r^   r>   r%  s      r-   test_save_load_nativez4NativeTrainingContinuationTest.test_save_load_native[  s     1
J'' 	0 	0MMz***N:..E((///KKC4D4DU\KZZZJJz"""((///	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0r(  c                 ,   t           j        j                            t	          d                    }|j        d         }t          j        g d          }|                     t          j	        ||d                     | 
                    |           d S )Ntoy-model-pretrained.bin
monarchist)gm2d?gn?g~jt?g'Ig:̗`r  r   )r&   r'   r   r(   r
   rY   r   r  r   r   r   )r,   rc   actualr  s       r-   test_load_native_pretrainedz:NativeTrainingContinuationTest.test_load_native_pretrainedj  s    &::8D^;_;_``,'8JJJKKHf5AAABBB$$U+++++r/   c                 L   t          d          }t          j        j                            |          }|                     d|j        v            |                     d|j        v            |d         }|d         }|                     t          j	        ||                     d S )Ncrime-and-punishment.binlandlordlandlady)
r
   r&   r'   r   load_facebook_vectorsr   r   r   r   r   )r,   cap_pathfbkv
oov_vector	iv_vectors        r-   test_load_native_vectorsz7NativeTrainingContinuationTest.test_load_native_vectorsq  s    677}%;;HEEt'88999
d&77888*%
$	Z;;<<<<<r/   c                 :   t           j        j                            t	          d                    }|j        d         }t          j        |j        |j	                  }| 
                    t          j        ||                     |                     |           d S )Nr1   )r&   r'   r   r(   r
   rY   r   zerosr]   r]  r   r   r   )r,   rc   r   origins       r-   test_no_ngramsz-NativeTrainingContinuationTest.test_no_ngramsz  s{    &::8D^;_;_``Xb\"(BH--B//000$$U+++++r/   )r  r  r  maxDiffr#   r   r.   r  r  r  r  r"  r'  r*  r/  r9  r>  rJ   r/   r-   r  r    s        G/G
 
 
$
- 
- 
-- - -- - -$- - -$, , ,$0 0 0(0 0 0, , ,= = =, , , , ,r/   r  c                      ddl } t          d          }t          d          }|                     |ddd|          }|                    t          d	                     dS )
zGenerate toy-model-pretrained.bin for use in test_load_native_pretrained.

    Requires https://github.com/facebookresearch/fastText/tree/master/python to be installed.

    r   Nr  zpretrained.vecr  skipgramr   )r;   rc   dimpretrainedVectorsr,  )fastTextr
   train_unsupervised
save_model)rD  training_textpretrained_filerc   s       r-   $_train_model_with_pretrained_vectorsrI    ss     OOO^,,M/00O''*! (  E 
X899:::::r/   c                       e Zd Zd Zd ZdS )HashCompatibilityTestc                     t          j        t          d                    }|                     |j        j                   d S )Nzcompatible-hash-true.model)rR   r   r
   r   rY   compatible_hashr,   ms     r-   test_compatibility_truez-HashCompatibilityTest.test_compatibility_true  s7    N8$@AABB,-----r/   c                 `    t                      }|                     |j        j                   d S r   )r  r   rY   rM  rN  s     r-   test_hash_nativez&HashCompatibilityTest.test_hash_native  s(    MM,-----r/   N)r  r  r  rP  rR  rJ   r/   r-   rK  rK    s2        . . .. . . . .r/   rK  c                   *    e Zd ZdZd Zd Zd Zd ZdS )FTHashResultsTestzLoosely based on the test described here:

    https://github.com/RaRe-Technologies/gensim/issues/2059#issuecomment-432300777

    With a broken hash, vectors for non-ASCII keywords don't match when loaded
    from a native model.
    c                 .   t           j        j                            t	          d                    | _        t          j        t	          d          dd          5 }t          t          |                    | _
        d d d            d S # 1 swxY w Y   d S )Nr1  zcrime-and-punishment.vecr  r  r   )r&   r'   r   r(   r
   rc   r   r  r  r  r  r,   r  s     r-   r.   zFTHashResultsTest.setUp  s     ]+??Ic@d@dee
Z!;<<cGTTT 	0X[ #//DM	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0s   "B

BBc                     d}| j         |         }| j        j        |         }|                     t	          j        ||d                     d S )Nr3  r  r   r  rc   rY   r   r   r   r,   r   r  r.  s       r-   
test_asciizFTHashResultsTest.test_ascii  sJ    =&t$Hf4@@@AAAAAr/   c                     d}| j         |         }| j        j        |         }|                     t	          j        ||d                     d S )Nu   хозяйкаr  r   rX  rY  s       r-   test_unicodezFTHashResultsTest.test_unicode  sJ     =&t$Hf4@@@AAAAAr/   c           	          d}dt          j        g d          dt          j        g d          |t          j        g d          i} fd|D             }                     t          j        |d         |d         d	                                          t          j        |d         |d         d	                                          t          j        ||         ||         d	                     d S )
N'rechtsschutzversicherungsgesellschaften
steamtrain)g+`?g+f?gUGt?geVp;4?g() ?u   паровоз)g6P*+kg8	ʬ?g)x?gY?gy-K?)g)=Kegip[[x?gs?g):]?ggRy?c                 6    i | ]}|j         j        |         S rJ   )rc   rY   )rL   rM   r,   s     r-   r  z7FTHashResultsTest.test_out_of_vocab.<locals>.<dictcomp>  s$    888!!TZ]1%888r/   r  r   )r   r  r   r   )r,   longwordr  r.  s   `   r-   r  z#FTHashResultsTest.test_out_of_vocab  s    =28$V$V$VWWrx(^(^(^__bhRRRSS

 9888x888H]$;VM=RY]^^^___H->$?HYAZaefffgggHX$6x8HtTTTUUUUUr/   N)r  r  r  __doc__r.   rZ  r\  r  rJ   r/   r-   rT  rT    se         0 0 0B B BB B B
V 
V 
V 
V 
Vr/   rT  c           
         dt           i}	 ||          }n0# t          $ r# t          d| dt          |                    w xY wt          j        D ]}d| v r;|                    d                                                              d          }n'|                                                    d          }|D ]!}t          d|d	 ||          d
           "dS )z2Generate hash values for test from standard input.cy_bytesinvalid alg:  expected one of bytesr      r  ur   ,N)	r   r   sortedsysstdinencoderstripr  print)alghashmapfunrV  r  r   s         r-   	hash_mainrt    s    	MGVcl V V VhsssF7OOOTUUUV 	 2 2c> 	-KK((//1177==EEKKMM'',,E 	2 	2DEcc$iiii01111	22 2s	    -Ac                       e Zd Zd Zd ZdS )FTHashFunctionsTestc                 2    ddddddddd	d
ddddd| _         d S )Ni:'fl   (f iI+l   Waw iMMi*iB=RiSpqiviA6}l   S l   1Ih l   4R l   
3 )u   командаu   маленькихu   друзейu
   возитu
   грузыu   всехu   быстрей
mysteriousasteroidodysseyintroductionu	   北海道u   札幌u   西区)r  r+   s    r-   r.   zFTHashFunctionsTest.setUp  s@      *#-&%%")%#!'$!!
 
r/   c                 ^    d | j         D             }|                     | j         |           d S )Nc                 T    i | ]%}|t          |                    d                     &S r  )r   rn  )rL   ks     r-   r  z3FTHashFunctionsTest.test_cython.<locals>.<dictcomp>  s.    MMM!!]188G#4#455MMMr/   )r  r[   )r,   r.  s     r-   test_cythonzFTHashFunctionsTest.test_cython  s5    MMt}MMM/////r/   N)r  r  r  r.   r  rJ   r/   r-   rv  rv    s2        
 
 
40 0 0 0 0r/   rv  c            	         t           j        d         } t          t           j        d                   }t          t           j        d                   }||k    s
J d            t          t          d}	 ||          }n0# t
          $ r# t          d| dt          |                    w xY wt           j        D ]9}|                    d          } ||||          }t          |d	|d
           :dS )z.Generate ngrams for tests from standard input.r2   r  r   z%expected sane command-line parameters)cy_textrd  re  rf  
r   rj  N)
rl  argvintr   r   r   rk  rm  ro  rp  )rq  minnmaxnrr  rs  rV  r   ngramss           r-   
ngram_mainr    s    (1+Csx{Dsx{D4<@@@@@@ "( GVcl V V VhsssF7OOOTUUUV 	 * *{{4  T4&&444())))* *s   1A: :-B'c                   l    e Zd Zd Zd Z ej        ej        dk    d          d             Z	d Z
d ZdS )	
NgramsTestc                     g dg dg dg dg dg dd| _         g dg d	g d
g dg dd| _        dg di| _        dg di| _        d S )N)	z<tetesestzst>z<tesrn   zest>z<testztest>)<atzat zt tz thr   he>z<at zat tzt thz thethe>z<at tzat thzt thez the>)r  zat
zt
tz
thr   r  z<at
zat
tzt
thz
ther  z<at
tzat
thzt
thez
the>)	u   <теu   тесu   естu   ст>u   <тес   тестu   ест>u	   <тестu	   тест>)u   <テス	   テストu   スト>u
   <テストu
   テスト>u   <テスト>)u   <試しu   試し>u   <試し>)rn   at thezat
ther  r     試し)	s   <tes   <tess   <tests   tess   tests   test>s   ests   est>s   st>)s   <ats   <at s   <at ts   at s   at ts   at ths   t ts   t ths   t thes    ths    thes    the>s   thes   the>s   he>)	s   <теs   <тесs	   <тестs   тесs   тестs	   тест>s   естs   ест>s   ст>)s   <テスs
   <テストs   <テスト>s	   テストs
   テスト>s   スト>)s   <試しs   <試し>s   試し>)rn   r  r  r  r     🚑🚒🚓🚕)	u	   <🚑🚒u   🚑🚒🚓u   🚒🚓🚕u	   🚓🚕>u   <🚑🚒🚓r  u   🚒🚓🚕>u   <🚑🚒🚓🚕u   🚑🚒🚓🚕>)	s	   <🚑🚒s   <🚑🚒🚓s   <🚑🚒🚓🚕s   🚑🚒🚓s   🚑🚒🚓🚕s   🚑🚒🚓🚕>s   🚒🚓🚕s   🚒🚓🚕>s	   🚓🚕>)expected_textexpected_bytesexpected_text_wide_unicodeexpected_bytes_wide_unicoder+   s    r-   r.   zNgramsTest.setUp  s    ZZZ     {zzggg888
 
 dcc    
  
 rqq!
 
(  ! ! !+
'  
! 
! 
!,
(((r/   c                     | j         D ]6}| j         |         }t          |dd          }|                     ||           7d S Nr   r   )r  r   r[   rY  s       r-   test_text_cyzNgramsTest.test_text_cyN  sU    & 	/ 	/D)$/H#D!Q//FXv....	/ 	/r/   i  z7Python interpreter doesn't support UCS-4 (wide unicode)c                     | j         D ]6}| j         |         }t          |dd          }|                     ||           7d S r  )r  r   r[   rY  s       r-   test_text_cy_wide_unicodez$NgramsTest.test_text_cy_wide_unicodeT  sU    3 	/ 	/D6t<H#D!Q//FXv....	/ 	/r/   c                 "   | j         D ]}| j         |         }t          |dd          }|                     ||           | j        |         }d |D             }|                     t	          |          t	          |                     | j        D ]}| j        |         }t          |dd          }|                     ||           | j        |         }d |D             }|                     t	          |          t	          |                     d S )Nr   r   c                 8    g | ]}|                     d           S r~  decoderL   ns     r-   rO   z,NgramsTest.test_bytes_cy.<locals>.<listcomp>b  $    ===188G,,===r/   c                 8    g | ]}|                     d           S r~  r  r  s     r-   rO   z,NgramsTest.test_bytes_cy.<locals>.<listcomp>k  r  r/   )r  r   r[   r  rk  r  r  )r,   r   r  r.  r  actual_texts         r-   test_bytes_cyzNgramsTest.test_bytes_cy[  s0   ' 	I 	ID*40H)$155FXv... .t4M==f===KVM22F;4G4GHHHH4 	I 	ID7=H)$155FXv... ;DAM==f===KVM22F;4G4GHHHH	I 	Ir/   c                 l   t          j        t          d          dd          5 }t          t	          |                    }ddd           n# 1 swxY w Y   |                                D ]F\  }}t          |dd          }|                     t          |          t          |                     GdS )z4Test against results from Facebook's implementation.zfb-ngrams.txtr  r  r   Nr   r   )	r   r  r
   r  _read_fbr  r   r[   rk  )r,   r  fbr   r  r.  s         r-   test_fbzNgramsTest.test_fbn  s    Z113III 	%Shsmm$$B	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% !hhjj 	? 	?ND( $D!Q//FVH--vf~~>>>>	? 	?s   AAAN)r  r  r  r.   r  r  r  rl  
maxunicoder  r  r  rJ   r/   r-   r  r    s        4
 4
 4
l/ / / X_S^v-/hii/ / ji/I I I&
? 
? 
? 
? 
?r/   r  c              #     K   | r|                                                                  }|sdS |dk    sJ |                                                                  }|                                   g }	 |                                                                  }|dk    rnH|                    d          }d                    |dd                   }|                    |           u||fV  | dS dS )z*Read ngrams from output of the FB utility.z<start>Tz<end>r  N)r  ro  r  joinr[  )r  rV  r   r  r  terms         r-   r  r  {  s     &  ||~~$$&& 	Ey    ||~~$$&&	 <<>>((**Dw jjooG88GCRCL))DMM$	  Fl)      r/   c                   $    e Zd ZdZd Zd Zd ZdS )ZeroBucketTestzLTest FastText with no buckets / no-ngrams: essentially FastText-as-Word2Vec.c                 f    t          d          }|                     |j        d                    d S )Nr   r;   	anarchist)r  assertIsNotNonerY   rq   s     r-   r  zZeroBucketTest.test_in_vocab  s3    A&&&UXk233333r/   c                     t          d          }|                     t                    5  |j                            d           d d d            d S # 1 swxY w Y   d S )Nr   r  streamtrain)r  ro   r   rY   ra   rq   s     r-   r  z ZeroBucketTest.test_out_of_vocab  s    A&&&x(( 	/ 	/H...	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/ 	/s   AAAc                 d    t          ddddddddddd          }t          j        | |           d	S )
z@See `gensim.test.test_word2vec.TestWord2VecModel.test_cbow_neg`.r   r2   r   r      r?   r  )r   r   r   r   r7   r8   r6   r>   r:   r   r   N)rR   r   rV   rq   s     r-   test_cbow_negzZeroBucketTest.test_cbow_neg  sJ    AT!BAa
 
 

 	&tU33333r/   N)r  r  r  rb  r  r  r  rJ   r/   r-   r  r    sG        VV4 4 4/ / /
4 4 4 4 4r/   r  c                       e Zd Zd Zd ZdS )UnicodeVocabTestc                 |   t          j                    }d|_        |                    t	          j        dddd                     |                    t	          j        dd                     |                    d           |                    d           |                    t	          j        d	d
d                     |                    d           |                    d           |                    t	          j        d	dd                     |                    d           t          j        j	        
                    |d          \  }}}}d
dd}|                     |t          |                     |                     |d           |                     |d           |                     |d           d S )N!dummy name to keep fasttext happy@3ir  r   @1qr?   s   hello    @qbr2   s   worldr   F)helloworldioBytesIOnamewritestructpackseekr&   r'   _fasttext_bin_load_vocabr[   r  r,   bufr  r   nlabelsntokensr  s          r-   rZ  zUnicodeVocabTest.test_ascii  s   jll6		&+eQB//000		&+eR(()))		(		'		&+eQ++,,,		(		'		&+eQ++,,,28-2M2Y2YZ]_d2e2e/	:w++4	??333Q'''"%%%"%%%%%r/   c                 |   t          j                    }d|_        |                    t	          j        dddd                     |                    t	          j        dd                     |                    d           |                    d           |                    t	          j        d	d
d                     |                    d           |                    d           |                    t	          j        d	dd                     |                    d           t          j        j	        
                    |d          \  }}}}d
dd}|                     |t          |                     |                     |d           |                     |d           |                     |d           d S )Nr  r  r  r   r  r?   s:   英語版ウィキペディアへの投稿はいつでもr  r  r2   s:   административно-территориальнr   F)u=   英語版ウィキペディアへの投稿はいつでも\xe6u=   административно-территориальн\xd1r  r  s          r-   test_bad_unicodez!UnicodeVocabTest.test_bad_unicode  s   jll6		&+eQB//000		&+eR(())) 			8	
 	
 	
 			'		&+eQ++,,,		4	
 	
 	
 			'		&+eQ++,,,28-2M2Y2YZ]_d2e2e/	:w PQOP
 

 	4	??333Q'''"%%%"%%%%%r/   N)r  r  r  rZ  r  rJ   r/   r-   r  r    s2        & & &,*& *& *& *& *&r/   r  s+   the quick brown fox jumps over the lazy dog)	r  r   g       @g      @g      @g      @g      @g      @g       @rX  r  c                        e Zd Zd Zd Zd ZdS )TestFromfilec                     t          t          d          d          5 }|                     |           d d d            d S # 1 swxY w Y   d S )Nzreproduce.datrb)r  r
   _runrV  s     r-   test_decompressedzTestFromfile.test_decompressed  s    (?++T22 	cIIcNNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AAAc                     t          j        t          d          d          5 }|                     |           d d d            d S # 1 swxY w Y   d S )Nzreproduce.dat.gzr  )gzipGzipFiler
   r  rV  s     r-   test_compressedzTestFromfile.test_compressed  s    ]8$677>> 	#IIcNNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AA
A
c                    |                     t          t                              }|                     t          |           t          j        j                            |t          j	        t          j
        d                   }t                              d|           |                     t          j        t          |                     d S )Nr   z	array: %r)readr^   _BYTESr[   r&   r'   r  	_fromfile_ARRAYr]  r]   loggererrorr   r   r   )r,   r  r.  r  s       r-   r  zTestFromfile._run  s    #f++&&(((+55c6<VWYY[%(((FE2233333r/   N)r  r  r  r  r  r  rJ   r/   r-   r  r    sA            4 4 4 4 4r/   r  c                    t          di |}t          t          d                    }|                    |           |                    ||j        |j                   t          j        j	        
                    ||            |S )Nr  r<   rJ   )rR   r   r
   rT   rW   rX   r>   r&   r'   r   rM  )fnamemodel_paramsrc   r  s       r-   _create_and_save_fb_modelr    s|    %%%%EH%9::;;H	h	KK);ELKQQQ
M..ue<<<Lr/   c                 T    t          j        t          j        | |z
                      S r   )r   r   abs)r   r   s     r-   calc_max_diffr    s    6"&b//"""r/   c                        e Zd Zd Zd Zd ZdS )SaveFacebookFormatModelTestc           	         |dddddt           dd}t          d          5 }t          ||          }t          j        j                            |          }d d d            n# 1 swxY w Y   |                     |j        |j                   |                     |j	        |j	                   |                     |j
        |j
                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        |j                   |                     t'          |j        j                  t'          |j        j                             |j        j        D ]E}|j        |         }|j        |         }|                     t-          ||          t.                     Fd S )Nr?   r2   r   r3   )r   r5   r6   r7   r8   r9   r;   r:   zroundtrip_model_to_model.bin)rS   r   r  r&   r'   r   r(   r[   r5   r   r>   r8   r7   r   rY   r;   r   r   r   r  r   r^  r   MAX_WORDVEC_COMPONENT_DIFFERENCE)	r,   r   r  fpathmodel_trainedmodel_loadedrM   v_origv_loadeds	            r-   _check_roundtripz,SaveFacebookFormatModelTest._check_roundtrip#  sv     :;; 	Mu5e\JJM!=1EEeLLL	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	2L4LMMM-|/BCCC-|/BCCC/1FGGG)<?;;;)<?;;;)0,/2HIII)/1FGGG)/1FGGG-|/BCCC]-:;;SA]=^=^___!. 	_ 	_A"%a(F#q)HOOM&(;;=]^^^^	_ 	_s   5A!!A%(A%c                 2    |                      d           d S Nr2   r   r  r+   s    r-   test_skipgramz)SaveFacebookFormatModelTest.test_skipgramC      #####r/   c                 2    |                      d           d S Nr   r  r  r+   s    r-   	test_cbowz%SaveFacebookFormatModelTest.test_cbowF  r  r/   N)r  r  r  r  r  r  rJ   r/   r-   r  r  !  sE        _ _ _@$ $ $$ $ $ $ $r/   r  c                 ~    t          | d          5 }|                                }d d d            n# 1 swxY w Y   |S )Nr  )r  r  )r  fdatas      r-   _read_binary_filer  J  sv    	eT		 avvxx              Ks   266c                   $    e Zd ZdZd Zd Zd ZdS )SaveGensimByteIdentityTesta  
    This class containts tests that check the following scenario:

    + create binary fastText file model1.bin using gensim
    + load file model1.bin to variable `model`
    + save `model` to model2.bin
    + check if files model1.bin and model2.bin are byte identical
    c           	         |ddddt           ddd}t          d          5 }t          d          5 }t          ||           t          j        j                            |          }t          j        j                            ||           t          |          }t          |          }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   | 	                    ||           d S )Nr?   r2   r   r3   r   r5   r6   r7   r8   r;   r9   r:   zroundtrip_file_to_file1.binzroundtrip_file_to_file2.bin)
rS   r   r  r&   r'   r   r(   rM  r  r[   )r,   r   r  fpath1fpath2rc   bin1bin2s           r-   _check_roundtrip_file_filez5SaveGensimByteIdentityTest._check_roundtrip_file_fileZ  sf     9:: 	-f899	-=C%fl;;;M*>>vFFEM"66ufEEE$V,,D$V,,D	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	t$$$$$s5   CA8B4(C4B8	8C;B8	<CCCc                 2    |                      d           d S r  r  r+   s    r-   r  z(SaveGensimByteIdentityTest.test_skipgramo      ''1'-----r/   c                 2    |                      d           d S r  r  r+   s    r-   r  z$SaveGensimByteIdentityTest.test_cbowr  r  r/   Nr  r  r  rb  r  r  r  rJ   r/   r-   r  r  P  sK         % % %*. . .. . . . .r/   r  c           
          t          d          }|d         dk    rdnd}t          |d                   }t          |d                   }t          |d|d	| d
|d|g
}t          j        |           d S )Nr  r   r   cbowrA  r5   r9   z-inputz-outputz-dimz-seed)r
   strFT_CMD
subprocess
check_call)out_base_fnamer  	inp_fname
model_typesizer9   cmds          r-   _save_test_modelr   v  s    -..I'-2B
J|M*++D|F#$$D 	
Higt5C #r/   z.fasttext not in FT_HOME or PATH, skipping testc                   $    e Zd ZdZd Zd Zd ZdS )SaveFacebookByteIdentityTesta/  
    This class containts tests that check the following scenario:

    + create binary fastText file model1.bin using facebook_binary (FT)
    + load file model1.bin to variable `model`
    + save `model` to model2.bin using gensim
    + check if files model1.bin and model2.bin are byte-identical
    c                 *   d|dd}t          d          5 }t          d          5 }t          d          5  |d d         }t          ||           t          j        j                            |          }t          j        j                            ||           t          |          }t          |          }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |                     ||           d S )Nr?   r3   )r5   r   r9   zm1.binzm2.binzm1.vec)	r   r   r&   r'   r   r(   rM  r  r[   )	r,   r   r  m1m2m1_basenamerc   r  r  s	            r-   r  z7SaveFacebookByteIdentityTest._check_roundtrip_file_file  s   ')R@@ H%% 	)^H-E-E 	)^\dMeMe 	) 	)SbS'K[,777M*>>rBBEM"66ubAAA$R((D$R((D	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	t$$$$$sX   C2CBC8CCCCCC2C	C2"C	#C22C69C6c                 2    |                      d           d S r  r  r+   s    r-   r  z*SaveFacebookByteIdentityTest.test_skipgram  r  r/   c                 2    |                      d           d S r  r  r+   s    r-   r  z&SaveFacebookByteIdentityTest.test_cbow  r  r/   Nr  rJ   r/   r-   r"  r"    sK         % % % . . .. . . . .r/   r"  c                 r   d t           d| g}t          j        |t          j        t          j                  }d                    |          }|                    |                    d                    \  }}t          j        fd|	                                D             t          j
                  S )	Nc                     t          j        d |                                 dd          D             t           j                  S )Nc                 ,    g | ]}t          |          S rJ   r  )rL   ss     r-   rO   zK_read_wordvectors_using_fasttext.<locals>.line_to_array.<locals>.<listcomp>  s    <<<aq<<<r/   r2   r  )r   r  r  rX  )rV  s    r-   line_to_arrayz7_read_wordvectors_using_fasttext.<locals>.line_to_array  s9    x<<4::<<+;<<<BJOOOOr/   zprint-word-vectors)rm  stdoutr  r  )inputc                 &    g | ]} |          S rJ   rJ   )rL   rV  r.  s     r-   rO   z4_read_wordvectors_using_fasttext.<locals>.<listcomp>  s#    FFFT]]4((FFFr/   r  )r  r  PopenPIPEr  communicatern  r   r  
splitlinesrX  )fasttext_fnamer  r  process	words_strout_r.  s          @r-    _read_wordvectors_using_fasttextr;    s    P P P '
8C:?     G 		%  I  y'7'7'@'@ AAFC8FFFFS^^5E5EFFFbjYYYYr/   c                   $    e Zd ZdZd Zd Zd ZdS )SaveFacebookFormatReadingTesta<  
    This class containts tests that check the following scenario:

    + create fastText model using gensim
    + save file to model.bin
    + retrieve word vectors from model.bin using fasttext Facebook utility
    + compare vectors retrieved by Facebook utility with those obtained directly from gensim model
    c           	      |   |ddddt           ddd}t          d          5 }t          ||          }t          ||j        j                  }d d d            n# 1 swxY w Y   t          |j        j                  D ]@\  }}t          ||d d f         |j        |                   }|                     |d           Ad S )Nr?   r2   r   r3   r
  zload_fasttext.binr   )	rS   r   r  r;  rY   r   	enumerater  r^  )	r,   r   r  r  rc   rY   irM   diffs	            r-   _check_load_fasttext_formatz9SaveFacebookFormatReadingTest._check_load_fasttext_format  s      /00 	PE-e\BBE1%9NOOB	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P 	P eh344 	* 	*DAq AqqqD58A;77DOOD&))))	* 	*s   +AAAc                 2    |                      d           d S r  rB  r+   s    r-   r  z+SaveFacebookFormatReadingTest.test_skipgram      ((A(.....r/   c                 2    |                      d           d S r  rD  r+   s    r-   r  z'SaveFacebookFormatReadingTest.test_cbow  rE  r/   N)r  r  r  rb  rB  r  r  rJ   r/   r-   r=  r=    sK         * * *(/ / // / / / /r/   r=  c                        e Zd Zd Zd Zd ZdS )
UnpackTestc                 "   t          j        t          d                    }d|_        dddd}t	          |d|          }|                     t          j        t          j        g d          |d	         k                         |                     t          j        t          j        g d
          |d         k                         |                     t          j        t          j        g d          |d         k                         d S )N	   r   r   r   r2   r  )r?   rE   r1      r   r2   r  r?   r   r/  r   rE   r      r   r1   r   r  ranger]   r   r   r   r,   rO  
hash2indexr  s       r-   r  zUnpackTest.test_sanity  s    HU1XX**
Ar:&&rx			22ae;<<===rx			22ae;<<===rx			22ae;<<=====r/   c                 "   t          j        t          d                    }d|_        dddd}t	          |d|          }|                     t          j        t          j        g d          |d         k                         |                     t          j        t          j        g d	          |d         k                         |                     t          j        t          j        g d
          |d         k                         d S )NrJ  rK  r   r2   r  )r2   r   r1   rL  rN  rM  rO  r1   rQ  rS  s       r-   test_trickyzUnpackTest.test_tricky  s    HU1XXqa((
Ar:&&rx			22ad:;;<<<rx			22ad:;;<<<rx			22ae;<<=====r/   c                 "   t          j        t          d                    }d|_        dddd}t	          |d|          }|                     t          j        t          j        g d          |d         k                         |                     t          j        t          j        g d          |d         k                         |                     t          j        t          j        g d	          |d         k                         d S )
NrJ  rK  r   r2   r  rM  rL  rN  rO  rQ  rS  s       r-   test_identityzUnpackTest.test_identity  s    HU1XXqQ''
Ar:&&rx			22ad:;;<<<rx			22ad:;;<<<rx			22ad:;;<<<<<r/   N)r  r  r  r  rV  rX  rJ   r/   r-   rH  rH    sA        > > >> > >= = = = =r/   rH  c                       e Zd Zd Zd ZdS )FastTextKeyedVectorsTestc           	         t          dddd          }|                    dt          j        ddg                     |                     |j        d         d           |                     |j        d         d           |                     t          j        |j	        d         t          j        ddg          k                         d S )Nr  r   r   r  r5   r   r   r;   test_keyr   )
r   
add_vectorr   r  r[   r   r   r   r   r\   r,   rY   s     r-   test_add_vectorz(FastTextKeyedVectorsTest.test_add_vector  s    !aq'RRR
j"(Aq6"2"23334a888+Z888rz!}!Q0@0@@AABBBBBr/   c           	         t          dddd          }|                    ddgt          j        ddgd	d	gg                     |                     |j        d         d           |                     |j        d         d           |                     t          j        |j	        d         t          j        ddg          k                         |                     |j        d         d	           |                     |j        d	         d           |                     t          j        |j	        d	         t          j        d	d	g          k                         d S )
Nr  r   r   r  r\  	test_key1	test_key2r   r2   )
r   add_vectorsr   r  r[   r   r   r   r   r\   r_  s     r-   test_add_vectorsz)FastTextKeyedVectorsTest.test_add_vectors  s,   !aq'RRR
[128aVaV<L3M3MNNN5q999+[999rz!}!Q0@0@@AABBB5q999+[999rz!}!Q0@0@@AABBBBBr/   N)r  r  r  r`  re  rJ   r/   r-   rZ  rZ    s:        C C C
C 
C 
C 
C 
Cr/   rZ  __main__z)%(asctime)s : %(levelname)s : %(message)s)formatlevel)r  r   )n
__future__r   r  r  loggingr  osshutilr  r  rl  numpyr   pytestr&   r   gensim.models.word2vecr   gensim.models.fasttextr   rR   r   r   gensim.models.keyedvectorsr	   gensim.test.utilsr
   r   r   r   rU   r   rZ  gensim.test.test_word2vecr   gensim.models._fasttext_bingensim.models.fasttext_innerr   r   r   otr   r  ImportError
ValueError	getLoggerr  r  r  calcsizeIS_WIN32r  rS   environgetr   whichr  r@  TestCaser#   markparametrizer  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rI  rK  rT  rt  rv  r  r  r  r  r  r  r  r]  r  r  r  r  r  r  r  r   r  r"  r;  r=  rH  rZ  basicConfigDEBUGmainrJ   r/   r-   <module>r     sW           				   				       



            / / / / / / W W W W W W W W W W 3 3 3 3 3 3              8 7 7 7 7 7 " " " " \ \ \ \ \ \ \ \ \ \    GGZ    GGG 
	8	$	$GtOA/&/#"6"6":b"@#*   

*..
#
#	jw	/	/	/	K<6<
3K3K /..7n7,,,KH KH KH KH KH) KH KH KH\ )D%=99' ' :9'@ )D%=99!+ !+ :9!+H )D%=99' ' :9'@ )D%=99!+ !+ :9!+H 
T((>
"
"## 4sXXZZ%%''--c223M4 4 4 4 4 4 4 4 4 4 4 4 4 4 4     7 7 7@ @ @ 4 4 4 	0 	0 	0[, [, [, [, [,X%6 [, [, [,|; ; ;". . . . .H- . . .&V &V &V &V &V) &V &V &VR2 2 2&0 0 0 0 0(+ 0 0 0J* * *0a? a? a? a? a?" a? a? a?H' ' 'T4 4 4 4 4X& 4 4 4*A& A& A& A& A&x( A& A& A&H 
8	666hbhy>Q>Q	R	R	R4 4 4 4 48$ 4 4 4$  # # #&$ &$ &$ &$ &$("3 &$ &$ &$R  #. #. #. #. #.!2 #. #. #.L   VMNN. . . . .8#4 . . ON.B
Z 
Z 
Z VMNN"/ "/ "/ "/ "/H$5 "/ "/ ON"/J= = = = =" = = =@C C C C Cx0 C C C, z GJRYR_````HMOOOOO s$   6A? ?	B
B;HH H