
    cWb                        d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZmZmZ  G d dej                  Z G d d	          Z G d
 de          Zd Z G d deej                  Z G d deej                  Z G d deej                  Z G d deej                  Z G d d          Z G d dee          Z G d dej                  Zedk    r( ej        dej                    ej                     dS dS )z2
Automated tests for the phrase detection module.
    N)PhrasesFrozenPhrases_PhrasesTransformation)original_scorer)common_textstemporary_filedatapathc                   B    e Zd Z G d de          Zd Zd Zd Zd ZdS )TestPhraseAnalysisc                   $     e Zd Z fdZd Z xZS )!TestPhraseAnalysis.AnalysisTesterc                 l    t                                          h d           || _        || _        d S )N>   aofthewithconnector_words)super__init__scores	threshold)selfr   r   	__class__s      8lib/python3.11/site-packages/gensim/test/test_phrases.pyr   z*TestPhraseAnalysis.AnalysisTester.__init__   s7    GG-G-G-GHHH DK&DNNN    c                     d                     |g|z   |gz             }| j                            |d          }|| j        k    r||fS dS )N_)NN)joinr   getr   )r   word_aword_b
in_betweenphrasescores         r   score_candidatez1TestPhraseAnalysis.AnalysisTester.score_candidate   sT    XXvh3vh>??FKOOFB//Et~% %u}$:r   )__name__
__module____qualname__r   r'   __classcell__)r   s   @r   AnalysisTesterr      sG        	' 	' 	' 	' 	'
	 	 	 	 	 	 	r   r,   c                     g d}|                      i d          |         }|                     ||           g d}|                      i d          |         }|                     ||           dS )z$Test transformation with no phrases.)simplesentenceshouldpass   r   )	r   r.   r/   r   nobigrambutcommontermsNr,   assertEqual)r   r/   results      r   test_simple_analysisz'TestPhraseAnalysis.test_simple_analysis$   s    ;;;$$R1$55h?***```$$R1$55h?*****r   c                 f   ddddd}g d}|                      |d          |         }|                     |g d           g d}|                      |d          |         }|                     |g d           g d	}|                      |d          |         }|                     ||           d S )
N   simple_sentencesentence_manymany_possiblepossible_bigrams)r.   r/   manypossiblebigramsr2   r3   )r@   rB   rF   )somer.   r/   rD   rF   )rG   r@   rD   rF   )rG   	unrelatedr.   wordsr9   r   r   r/   r;   s       r   test_analysis_bigramsz(TestPhraseAnalysis.test_analysis_bigrams-   s     1A
 
 IHH$$Vq$99(C!P!P!PQQQDDD$$Vq$99(C!O!O!OPPP;;;$$Vq$99(C*****r   c                     ddddd}g d}|                      |d          |         }|                     |g d           g d}|                      |d          |         }|                     |g d           d S )	Nr>   r?   )r   r.   r/   rD   r   rE   rF   r2   r3   )r   r@   rD   r   rC   )	r.   r   r/   andrD   rE   rF   r   r   )r.   r   r/   rM   rB   rF   r   r   r9   rJ   s       r   test_analysis_connector_wordsz0TestPhraseAnalysis.test_analysis_connector_words>   s     1A
 
 UTT$$Vq$99(C!\!\!\]]]ccc$$Vq$99(CYYY	
 	
 	
 	
 	
r   c                     dddddd}g d}|                      |d          |         }|                     |ddg           g d}|                      |d          |         }|                     |g d	           d S )
Nr>   )r@   sentence_with_manyrB   many_of_the_possiblerC   )r/   r   rD   rE   rF   r2   r3   rP   rC   )
r   r.   r/   r   rD   r   r   rE   rF   r   )r   r@   r   rQ   rF   r   r9   rJ   s       r   (test_analysis_connector_words_in_betweenz;TestPhraseAnalysis.test_analysis_connector_words_in_betweenN   s     q
 
 GFF$$Vq$99(C"68J!KLLLjjj$$Vq$99(C___	a 	a 	a 	a 	ar   N)	r(   r)   r*   r   r,   r<   rK   rN   rR    r   r   r   r      s            /   + + ++ + +"
 
 
 a a a a ar   r   c                   D    e Zd Zeg dgz   Z e            ZdZdZdZ	d Z
dS )PhrasesDatagraphminorssurveyhuman	interfaceresponse_timegraph_minorshuman_interfacec                 $    d | j         D             S )Nc              3   .   K   | ]}d  |D             V  dS )c              3      K   | ]}|V  d S NrS   .0ws     r   	<genexpr>z6PhrasesData.gen_sentences.<locals>.<genexpr>.<genexpr>i   "      %%q%%%%%%r   NrS   rd   r/   s     r   rf   z,PhrasesData.gen_sentences.<locals>.<genexpr>i   1      EE(%%H%%%EEEEEEr   	sentencesr   s    r   gen_sentenceszPhrasesData.gen_sentencesh       EEdnEEEEr   N)r(   r)   r*   r   rk   	frozensetr   bigram1bigram2bigram3rm   rS   r   r   rU   rU   ]   s\        ;;;  I  ikkOGG GF F F F Fr   rU   c                   B    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )PhrasesCommonz1Tests for both Phrases and FrozenPhrases classes.c                     t          | j        dd| j                  | _        t          | j        | j                  | _        d S )Nr2   	min_countr   r   r   )r   rk   r   r5   bigram_defaultrl   s    r   setUpzPhrasesCommon.setUpo   s@    dnQX\Xlmmm%dndFZ[[[r   c                 X   t          | j                  }t          |          }t          || j                           }t          |          }||| j                          }t          |          t          |          }}|                     ||           |                     |g            d S rb   )r   rk   r   listr:   assertNotEqual)r   bigram_phrasesbigram_phrasertrigram_phrasestrigram_phrasertrigramsfstsnds           r   (test_empty_phrasified_sentences_iteratorz6PhrasesCommon.test_empty_phrasified_sentences_iterators   s     00&~66!."@AA'88">$.#AB>>4>>Sc"""C$$$$$r   c           	      H   |                      t          | j        g                    g            |                      t          | j        t          d                             g            |                      t          | j        g g g                   g g g           |                      t          | j        t          g g g                             g g g           |                      t          | j        d t	          d          D                                g g g           dS )zITest that empty inputs don't throw errors and return the expected result.rS   c              3   4   K   | ]}t          d           V  dS )rS   N)iter)rd   is     r   rf   zIPhrasesCommon.test_empty_inputs_on_bigram_construction.<locals>.<genexpr>   s(      2N2N4882N2N2N2N2N2Nr   r>   N)r:   r{   rx   r   rangerl   s    r   (test_empty_inputs_on_bigram_constructionz6PhrasesCommon.test_empty_inputs_on_bigram_construction}   s     	d1"566;;;d1$r((;<<bAAAd12r(;<<r2hGGGd1$Bx..ABBRHMMMd12N2NU1XX2N2N2NOPPSUWYRZ[[[[[r   c           	          |                      t          | j                  t          t          | j        | j                                                dS )z'Test basic bigram using a dummy corpus.N)r:   lenrk   r{   rx   rl   s    r   test_sentence_generationz&PhrasesCommon.test_sentence_generation   sP     	T(899::	
 	
 	
 	
 	
r   c           
          |                      t          t          |                                                     t          t          | j        |                                                                         dS )z8Test basic bigram production when corpus is a generator.N)r:   r   r{   rm   rx   rl   s    r   'test_sentence_generation_with_generatorz5PhrasesCommon.test_sentence_generation_with_generator   sj    T''))**++T(););)=)=>??@@	
 	
 	
 	
 	
r   c                    d}d}| j         | j                 D ]"}|s| j        |v rd}|s| j        |v rd}|r|r n#|                     |o|           |                     | j        | j         | j        d                  v            |                     | j        | j         | j        d                  v            |                     | j        | j         | j        d                  v            |                     | j        | j         | j        d                  v            |                     | j        | j         | j        d                  v            dS )z!Test Phrases bigram construction.FTr2      r   N)r5   rk   rp   rq   
assertTruerr   )r   bigram1_seenbigram2_seenr/   s       r   test_bigram_constructionz&PhrasesCommon.test_bigram_construction   sS    DN3 	 	H $DLH$< $# $DLH$< $#  5666 	DN14E(FFGGGDN14E(FFGGGDN24F(GGHHHDN24F(GGHHHDN24F(GGHHHHHr   c                     d}d}| j         |                                          D ]"}|s| j        |v rd}|s| j        |v rd}|r|r n#|                     |o|           dS )zETest Phrases bigram construction building when corpus is a generator.FTN)r5   rm   rp   rq   r   r   r   r   ss       r   'test_bigram_construction_from_generatorz5PhrasesCommon.test_bigram_construction_from_generator   s    T//112 	 	A $DLA$5 $# $DLA$5 $#  566666r   c                     d}d}| j         t          j        | j        t                             D ]"}|s| j        |v rd}|s| j        |v rd}|r|r n#|                     |o|           dS )zGTest Phrases bigram construction building when corpus is a numpy array.F)dtypeTN)r5   nparrayrk   objectrp   rq   r   r   s       r   #test_bigram_construction_from_arrayz1PhrasesCommon.test_bigram_construction_from_array   s    RXdnFCCCD 	 	A $DLA$5 $# $DLA$5 $#  566666r   N)r(   r)   r*   __doc__ry   r   r   r   r   r   r   r   rS   r   r   rt   rt   l   s        ;;\ \ \% % %\ \ \
 
 

 
 
I I I.7 7 77 7 7 7 7r   rt   c                     dS )Nr2   rS   )worda_countwordb_countbigram_count	len_vocabrw   corpus_word_counts         r   dumb_scorerr      s    1r   c                   D    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )TestPhrasesModelc                    t          | j        ddd          }t          || j                 ddd          }t          |                                                                          }t          |                                                                          }|t          g d          k    sJ |t          ddg          k    sJ dS )z/Test Phrases bigram and trigram export phrases.r2    rw   r   	delimiter)human interfaceresponse timegraph minorszminors surveyr   zgraph minors surveyNr   rk   setexport_phraseskeys)r   r5   trigramseen_bigramsseen_trigramss        r   test_export_phrasesz$TestPhrasesModel.test_export_phrases   s    1SQQQ&0AVYZZZ600227799::G224499;;<<s $
 $
 $
      	 	 	 !%
 ! !  	 	 	 	 	r   c                     t          | j        ddd          }t          |                    | j                                                            }|t          g d          k    sJ dS )z!Test Phrases bigram find phrases.r2   r   r   )r   r   r   Nr   rk   r   find_phrasesr   r   r5   r   s      r   test_find_phrasesz"TestPhrasesModel.test_find_phrases   s    1SQQQ6..t~>>CCEEFFs $
 $
 $
      	 	 	 	 	r   c                     t          | j        ddd          }g dg}t          |                    |                                                    }|ddhk    sJ dS ).Test a single entry produces multiple bigrams.r2   r   r   rV   r   r   Nr   r   r5   test_sentencesr   s       r   "test_multiple_bigrams_single_entryz3TestPhrasesModel.test_multiple_bigrams_single_entry   sl    1SQQQMMMN6..~>>CCEEFF0ABBBBBBBr   c                     t          | j        ddd          }g dg}t          d |                    |                                          D                       }|ddhk    sJ dS )	z:Test the default scoring, from the mikolov word2vec paper.r2   r   r   rV   c              3   6   K   | ]}t          |d           V  dS    Nroundrd   r&   s     r   rf   z8TestPhrasesModel.test_scoring_default.<locals>.<genexpr>  *      dde%q//ddddddr   ^I@Zd;O@Nr   rk   r   r   valuesr   r5   r   seen_scoress       r   test_scoring_defaultz%TestPhrasesModel.test_scoring_default   s    1SQQQMMMNddv7J7J>7Z7Z7a7a7c7cddddd
 
 	
 	
 	
 	
 	
r   c                     t          | j        dd          }g dg}t          ||                                                   }|g dk    sJ dS )/Test Phrases[sentences] with a single sentence.r2   rw   r   rV   )r]   rY   r^   N)r   rk   next__iter__r   r5   r   phrased_sentences       r   test__getitem__z TestPhrasesModel.test__getitem__  sc    1BBBMMMN~ 6 ? ? A ABB#P#P#PPPPPPPr   c                     t          | j        ddd          }g dg}t          d |                    |                                          D                       }|ddhk    sJ d	S )
5Test normalized pointwise mutual information scoring.r2         ?npmirw   r   scoringrV   c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   z5TestPhrasesModel.test_scoring_npmi.<locals>.<genexpr>  r   r   gMbX9?g+?Nr   r   s       r   test_scoring_npmiz"TestPhrasesModel.test_scoring_npmi  s    1FSSSMMMNddv7J7J>7Z7Z7a7a7c7cddddd
 
 	
 	
 	
 	
 	
r   c                    t          | j        ddt                    }g dg}t          |                    |                                                    }t          d |D                       sJ t          |          dk    sJ dS )%Test using a custom scoring function.r2   MbP?r   rW   rX   rY   rZ   r[   systemc              3   "   K   | ]
}|d k    V  dS r2   NrS   r   s     r   rf   z6TestPhrasesModel.test_custom_scorer.<locals>.<genexpr>!  &      77%5A:777777r   r   N)r   rk   r   r{   r   r   allr   r   s       r   test_custom_scorerz#TestPhrasesModel.test_custom_scorer  s    1kZZZWWWX6..~>>EEGGHH77;77777777;1$$$$$$r   c                     |                      t          t          | j        d           |                      t          t          | j        d           dS )z,Test the phrases module with bad parameters.r   )rw   r   r3   N)assertRaises
ValueErrorr   rk   rl   s    r   test_bad_parametersz$TestPhrasesModel.test_bad_parameters$  sJ     	*gt~KKK 	*gt~LLLLLr   c                     t          | j        d          }|                     t          |j                  dk               dS )z0Test that max_vocab_size parameter is respected.   )max_vocab_sizeN)r   rk   r   r   vocab)r   r5   s     r   test_pruningzTestPhrasesModel.test_pruning,  s=    :::FL))Q./////r   N)r(   r)   r*   r   r   r   r   r   r   r   r   r   rS   r   r   r   r      s          &	 	 	C C C	
 	
 	
Q Q Q	
 	
 	
% % %M M M0 0 0 0 0r   r   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )TestPhrasesPersistencec                    t          | j        ddt                    }t          d          5 }|                    |           t          j        |          }ddd           n# 1 swxY w Y   g dg}t          |                    |                                                    }t          d |D                       sJ t          |          dk    sJ dS )	z>Test saving and loading a Phrases object with a custom scorer.r2   r   r   test.pklNr   c              3   "   K   | ]
}|d k    V  dS r   rS   r   s     r   rf   zFTestPhrasesPersistence.test_save_load_custom_scorer.<locals>.<genexpr>>  r   r   r   )r   rk   r   r   saveloadr{   r   r   r   r   r   r5   fpathbigram_loadedr   r   s         r   test_save_load_custom_scorerz3TestPhrasesPersistence.test_save_load_custom_scorer4  s   1kZZZJ'' 	05KK#L//M	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 XWWX=55nEELLNNOO77;77777777;1$$$$$$s   *A##A'*A'c                    t          | j        dd          }t          d          5 }|                    |           t          j        |          }ddd           n# 1 swxY w Y   g dg}t          d |                    |                                          D                       }|t          ddg          k    sJ dS )	)Test saving and loading a Phrases object.r2   r   r   Nr   c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   z8TestPhrasesPersistence.test_save_load.<locals>.<genexpr>I  *      kke%q//kkkkkkr   r   r   )r   rk   r   r   r   r   r   r   r   s         r   test_save_loadz%TestPhrasesPersistence.test_save_loadA  s   1BBBJ'' 	05KK#L//M	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 XWWXkk}7Q7QR`7a7a7h7h7j7jkkkkkc#
    	 	 	 	 	s   *AA!$A!c                    t          dh          }t          | j        dd|          }t          d          5 }|                    |           t          j        |          }ddd           n# 1 swxY w Y   |j        |k    sJ dS )r  r   r2   rv   r   N)ro   r   rk   r   r   r   r   )r   r   r5   r   r   s        r   #test_save_load_with_connector_wordsz:TestPhrasesPersistence.test_save_load_with_connector_wordsO  s    #TF++1SbcccJ'' 	05KK#L//M	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 ,??????s   *A..A25A2c                     t          j        t          d                    }g dg}t          d |                    |                                          D                       }|t          ddg          k    sJ dS )zTTest backwards compatibility with a previous version of Phrases with custom scoring.zphrases-scoring-str.pklr   c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   zGTestPhrasesPersistence.test_save_load_string_scoring.<locals>.<genexpr>]  r  r   r   r   Nr   r   r	   r   r   r   r   r   r   r   s       r   test_save_load_string_scoringz4TestPhrasesPersistence.test_save_load_string_scoringY  s    X.G%H%HIIWWWXkk}7Q7QR`7a7a7h7h7j7jkkkkkc#
    	 	 	 	 	r   c                     t          j        t          d                    }g dg}t          d |                    |                                          D                       }|t          ddg          k    sJ dS )zTTest backwards compatibility with old versions of Phrases with no scoring parameter.zphrases-no-scoring.pklr   c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   zCTestPhrasesPersistence.test_save_load_no_scoring.<locals>.<genexpr>h  r  r   r   r   Nr
  r  s       r   test_save_load_no_scoringz0TestPhrasesPersistence.test_save_load_no_scoringd  s    X.F%G%GHHWWWXkk}7Q7QR`7a7a7h7h7j7jkkkkkc#
    	 	 	 	 	r   c                     t          j        t          d                    }|                     |j        t                                 t          |          }|g d          dS )zTEnsure backwards compatibility with old versions of Phrases, before connector_words.zphrases-no-common-terms.pkl)rZ   r[   rY   N)r   r   r	   r:   r   ro   r   )r   r   phrasers      r   test_save_load_no_common_termsz5TestPhrasesPersistence.test_save_load_no_common_termso  s\    X.K%L%LMM6	DDD..0001111r   N	r(   r)   r*   r   r  r  r  r  r  rS   r   r   r   r   3  sq        % % %  @ @ @	 	 		 	 	2 2 2 2 2r   r   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )TestFrozenPhrasesPersistencec           	      <   t          d          5 }t          t          | j        ddt                              }|                    |           t          j        |          }|                     |j        t                     ddd           dS # 1 swxY w Y   dS )zDTest saving and loading a FrozenPhrases object with a custom scorer.r   r2   r   r   N)	r   r   r   rk   r   r   r   r:   r   r   r   r5   r   s       r   r   z9TestFrozenPhrasesPersistence.test_save_load_custom_scorerz  s     J'' 	A5"!t[YYY[ [FKK).u55M]2K@@@	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	As   A4BBBc                 0   t          d          5 }t          t          | j        dd                    }|                    |           t          j        |          }|                     |g d         g d           ddd           dS # 1 swxY w Y   dS )/Test saving and loading a FrozenPhrases object.r   r2   r   r   )r]   rY   r^   r   N)r   r   r   rk   r   r   r:   r  s       r   r  z+TestFrozenPhrasesPersistence.test_save_load  s    J'' 	I5"74>QRS#T#T#TUUFKK).u55M[[[\GGGI I I		I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	I 	Is   A.BBBc           	      H   t          dh          }t          d          5 }t          t          | j        dd|                    }|                    |           t          j        |          }|                     |j        |           ddd           dS # 1 swxY w Y   dS )r  r   r   r2   rv   N)	ro   r   r   r   rk   r   r   r:   r   )r   r   r   r5   r   s        r   r  z@TestFrozenPhrasesPersistence.test_save_load_with_connector_words  s    #TF++J'' 	M5"74>QRSet#u#u#uvvFKK).u55M]:OLLL		M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	M 	Ms   A*BBBc                     t          j        t          d                    }|                     |j        t
                     dS )zTest saving and loading a FrozenPhrases object with a string scoring parameter.
        This should ensure backwards compatibility with the previous version of FrozenPhraseszphraser-scoring-str.pklNr   r   r	   r:   r   r   r   r   s     r   r  z:TestFrozenPhrasesPersistence.test_save_load_string_scoring  s<     &*84M+N+NOO.@@@@@r   c                     t          j        t          d                    }|                     |j        t
                     dS )zTest saving and loading a FrozenPhrases object with no scoring parameter.
        This should ensure backwards compatibility with old versions of FrozenPhraseszphraser-no-scoring.pklNr  r  s     r   r  z6TestFrozenPhrasesPersistence.test_save_load_no_scoring  s<     &*84L+M+MNN.@@@@@r   c                     t          j        t          d                    }|                     |j        t                                 dS )zZEnsure backwards compatibility with old versions of FrozenPhrases, before connector_words.zphraser-no-common-terms.pklN)r   r   r	   r:   r   ro   r  s     r   r  z;TestFrozenPhrasesPersistence.test_save_load_no_common_terms  s>    %*84Q+R+RSS6	DDDDDr   Nr  rS   r   r   r  r  x  s        A A AI I IM M MA A AA A AE E E E Er   r  c                       e Zd ZdZd ZdS )TestFrozenPhrasesModelzTest FrozenPhrases models.c                     t          | j        dd| j                  }t          |          | _        t          | j        | j                  }t          |          | _        dS )z*Set up FrozenPhrases models for the tests.r2   rv   r   N)r   rk   r   r   r5   rx   )r   r}   bigram_default_phrasess      r   ry   zTestFrozenPhrasesModel.setUp  se     Na1dFZ\ \ \#N33!(I]!^!^!^+,BCCr   N)r(   r)   r*   r   ry   rS   r   r   r!  r!    s.        $$D D D D Dr   r!  c                   v    e Zd ZdZg dg dg dg dg ddgg dg d	g d
g dg
Zg dZdZdZdZdZ	dZ
dZd ZdS )CommonTermsPhrasesDatazBThis mixin permits to reuse tests with the connector_words option.)rZ   r[   r   computer)rY   r   userr&  r   lackr   interest)epsr'  r[   r   )r   rM   rZ   r   r*  )r'  r(  r   r)  trees)rW   r   r+  )datarM   rW   r   r+  )r,  rM   rW   rY   r,  rM   rW   rY   forrZ   r[   )r   rM   r.  lack_of_interestdata_and_graphr^   lack of interestdata and graphr   c                 $    d | j         D             S )Nc              3   .   K   | ]}d  |D             V  dS )c              3      K   | ]}|V  d S rb   rS   rc   s     r   rf   zACommonTermsPhrasesData.gen_sentences.<locals>.<genexpr>.<genexpr>  rg   r   NrS   rh   s     r   rf   z7CommonTermsPhrasesData.gen_sentences.<locals>.<genexpr>  ri   r   rj   rl   s    r   rm   z$CommonTermsPhrasesData.gen_sentences  rn   r   N)r(   r)   r*   r   rk   r   rp   rq   rr   expression1expression2expression3rm   rS   r   r   r%  r%    s        LL 	322PPP...333***		   ///***GGGI +**O!GG G%K#K$KF F F F Fr   r%  c                   <    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	S )
TestPhrasesModelCommonTermsz)Test Phrases models with connector words.c                     t          | j        dd| j        d          }g dg}t          |                    |                                                    }|t          ddg          k    sJ dS )r   r2   r   rw   r   r   r   r-  r2  r   Nr   rk   r   r   r   r   r   s       r   r   z>TestPhrasesModelCommonTerms.test_multiple_bigrams_single_entry  s    1SWSgsvwwwYYYZ6..~>>CCEEFFs$
      	 	 	 	 	r   c                     t          | j        dd| j        d          }t          |                    | j                                                            }|t          g d          k    sJ dS )#Test Phrases bigram export phrases.r2   r   r<  )r   zgraph of treesr2  r1  Nr=  r   s      r   r   z-TestPhrasesModelCommonTerms.test_find_phrases  s    1SWSgsvwww6..t~>>CCEEFFs $
 $
 $
      	 	 	 	 	r   c                     t          | j        ddd          }t          |                                                                          }|t          g d          k    sJ dS )r?  r2   r   r   )z	and graphzdata andzgraph ofzgraph surveyr   zlack ofzof interestzof treesNr   r   s      r   r   z/TestPhrasesModelCommonTerms.test_export_phrases  s{    1SQQQ600227799::s 	$
 	$
 	$
 	  	  	 		 		 		 		 		r   c                    t          | j        dd| j                  }g dg}t          d |                    |                                          D                       }t          |j                  }t          t          |j	                            }t          |j	        d                   }t          |j	        d                   }t          |j	        d                   }t          |j	        d                   }	t          |j	        d	                   }
t          |j	        d
                   }|t          t          ||z
  |z  |z  |z  d          t          ||z
  |	z  |
z  |z  d          g          k    sJ dS )z; test the default scoring, from the mikolov word2vec paper r2   rv   r-  c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   zCTestPhrasesModelCommonTerms.test_scoring_default.<locals>.<genexpr>  r   r   rW   r,  r0  rZ   r[   r^   r   N)r   rk   r   r   r   r   floatrw   r   r   r   )r   r5   r   r   rw   r   rW   r,  r0  rZ   r[   r^   s               r   r   z0TestPhrasesModelCommonTerms.test_scoring_default  sx   1SWSghhhYYYZddv7J7J>7Z7Z7a7a7c7cddddd&*++	#fl++,,	fl7+,,V\&)**v|,<=>>fl7+,,&,{344	-> ?@@c>I-5=	I1MM?Y.%7)CiOQRSS	#
    	 	 	 	 	r   c                     t          | j        ddd| j                  }g dg}t          d |                    |                                          D                       }|t          ddg          k    sJ d	S )
r   r2   r   r   rw   r   r   r   r-  c              3   6   K   | ]}t          |d           V  dS r   r   r   s     r   rf   z@TestPhrasesModelCommonTerms.test_scoring_npmi.<locals>.<genexpr>  r   r   gGz?gS㥛?N)r   rk   r   r   r   r   r   s       r   r   z-TestPhrasesModelCommonTerms.test_scoring_npmi  s    Na2D,@
 
 
 ZYYZddv7J7J>7Z7Z7a7a7c7cdddddc#
    	 	 	 	 	r   c                 
   t          | j        ddt          | j                  }g dg}t	          |                    |                                                    }t          |          sJ t          |          dk    sJ dS )r   r2   r   rE  r-  r>   N)	r   rk   r   r   r{   r   r   r   r   r   s       r   r   z.TestPhrasesModelCommonTerms.test_custom_scorer!  s    Na41E
 
 
 ZYYZ6..~>>EEGGHH;;1$$$$$$r   c                     t          | j        dd| j                  }g dg}t          ||                                                   }|g dk    sJ dS )r   r2   rv   r-  )r0  rY   r.  r^   N)r   rk   r   r   r   r   s       r   r   z+TestPhrasesModelCommonTerms.test__getitem__-  si    1SWSghhhYYYZ~ 6 ? ? A ABB#Y#Y#YYYYYYYr   N)r(   r)   r*   r   r   r   r   r   r   r   r   rS   r   r   r:  r:    s        33	 	 	
 
 
    ,  
% 
% 
%Z Z Z Z Zr   r:  c                       e Zd Zd ZdS )#TestFrozenPhrasesModelCompatibilityc                 
   t          j        t          d                    }t          j        t          d                    }g d}|                     ||         ddg           |                     ||         ddg           d S )Nzphrases-3.6.0.modelzphraser-3.6.0.model)r+  rW   rX   r+  r]   )r   r   r	   r   r:   )r   phrasesr  r   s       r   test_compatibilityz6TestFrozenPhrasesModelCompatibility.test_compatibility8  s    ,x(=>>??$X.C%D%DEE55507N2KLLL07N2KLLLLLr   N)r(   r)   r*   rM  rS   r   r   rJ  rJ  6  s(        M M M M Mr   rJ  __main__z)%(asctime)s : %(levelname)s : %(message)s)formatlevel)r   loggingunittestnumpyr   gensim.models.phrasesr   r   r   r   gensim.test.utilsr   r   r	   TestCaser   rU   rt   r   r   r   r  r!  r%  r:  rJ  r(   basicConfigDEBUGmainrS   r   r   <module>rZ     s          P P P P P P P P P P 1 1 1 1 1 1 D D D D D D D D D DFa Fa Fa Fa Fa* Fa Fa FaRF F F F F F F F^7 ^7 ^7 ^7 ^7K ^7 ^7 ^7J  Z0 Z0 Z0 Z0 Z0}h&7 Z0 Z0 Z0|B2 B2 B2 B2 B2[(*; B2 B2 B2J0E 0E 0E 0E 0E;0A 0E 0E 0Ef
D 
D 
D 
D 
D]H,= 
D 
D 
DF F F F F F F F8_Z _Z _Z _Z _Z"8:J _Z _Z _ZDM M M M M(*; M M M z GJRYR_````HMOOOOO r   