
    cJ                     d   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
  G d d          Z G d dej                  Z G d	 d
ej                  Z G d dej                  Z G d dej                  Zedk    r0e j                            e j                    ej                     dS dS )    N)
Dictionary)InvertedIndexAccumulatorWordOccurrenceAccumulator!ParallelWordOccurrenceAccumulatorCorpusAccumulator)common_textsc                   4    e Zd Z G d dej                  ZdS )BaseTestCasesc                      e Zd Zg dddgg dg dgZdddd	d
dZ ee          Zee_        d e                                D             e_         e	e
                                          Zeddggz   Z ee          Zd ej                                        D             e_         e	ej        
                                          ZdZd Zd Zd Zd Zd ZdS )"BaseTestCases.TextAnalyzerTestBase)thisisatestdocument)r   r   r   )r   r   r   
               )r   r   r   r   r   c                     i | ]\  }}||	S  r   .0kvs      >lib/python3.11/site-packages/gensim/test/test_text_analysis.py
<dictcomp>z-BaseTestCases.TextAnalyzerTestBase.<dictcomp>   s    AAA1q!AAA    userc                     i | ]\  }}||	S r   r   r   s      r   r   z-BaseTestCases.TextAnalyzerTestBase.<dictcomp>"   s    NNNA1NNNr   Nc                 B    |                      | j        | j                  S Naccumulator_clstop_ids
dictionaryselfs    r   init_accumulatorz3BaseTestCases.TextAnalyzerTestBase.init_accumulator'   s    ''doFFFr   c                 B    |                      | j        | j                  S r#   r%   top_ids2dictionary2r(   s    r   init_accumulator2z4BaseTestCases.TextAnalyzerTestBase.init_accumulator2*   s    ''t7GHHHr   c                 R   |                                                      | j        d          }|                     d|                    d                     |                     d|                    d                     |                     d|                    d                     |                     d|                    dd                     |                     d|                    dd                     |                     d|                    dd                     d S )	N   r      r   r      r   r   )r*   
accumulatetextsassertEqualget_occurrencesget_co_occurrencesr)   accumulators     r   test_occurrence_countingz;BaseTestCases.TextAnalyzerTestBase.test_occurrence_counting-   s	   //11<<TZKKKQ ; ;F C CDDDQ ; ;D A ABBBQ ; ;C @ @AAAQ > >vz R RSSSQ > >vv N NOOOQ > >tS I IJJJJJr   c                 ^   |                                                      | j        d          }|                     d|                    d                     |                     d|                    d                     |                     d|                    d                     |                     d|                    d                     g d	}|D ]\  }\  }}|                     ||                    ||                     |                     ||                    ||                     | j        j        |         }| j        j        |         }|                     ||                    ||                     |                     ||                    ||                     d S )
Nn   r3   human   r    r1   graphtrees))r2   )r>   	interface)r3   )systemr    )r3   )r@   minors)r3   )r@   rA   )r?   )r    r    )r1   )r@   r@   )r   )timeeps)r/   r4   texts2r6   r7   r8   r.   token2id)r)   r:   casesexpected_countword1word2word_id1word_id2s           r   test_occurrence_counting2z<BaseTestCases.TextAnalyzerTestBase.test_occurrence_counting27   s   0022==dk3OOKQ ; ;G D DEEEQ ; ;F C CDDDQ ; ;G D DEEEQ ; ;G D DEEE  E 38 	e 	e.  1O1OPUW\1]1]^^^  1O1OPUW\1]1]^^^  +4U;+4U;  1O1OPXZb1c1cddd  1O1OPXZb1c1cdddd	e 	er   c                    |                                                      | j        d          }|                     t                    5  |                    d           d d d            n# 1 swxY w Y   |                     t                    5  |                    dd           d d d            d S # 1 swxY w Y   d S )Nr3   
irrelevantr   )r*   r4   r5   assertRaisesKeyErrorr7   r8   r9   s     r   $test_occurences_for_irrelevant_wordszGBaseTestCases.TextAnalyzerTestBase.test_occurences_for_irrelevant_wordsR   sD   //11<<TZKKK""8,, : :++L999: : : : : : : : : : : : : : :""8,, E E..v|DDDE E E E E E E E E E E E E E E E E Es$   A**A.1A.B33B7:B7)__name__
__module____qualname__r5   rH   r   r'   itemsid2tokensetvaluesr&   r   rG   r.   r-   r%   r*   r/   r;   rO   rT   r   r   r   TextAnalyzerTestBaser      sr       Z ((($$$	
 
 
  Z&&
&
AA0@0@AAA
#hoo''((&&!1 22 j((NN1E1K1K1M1MNNN3{+224455	G 	G 	G	I 	I 	I	K 	K 	K	e 	e 	e6	E 	E 	E 	E 	Er   r\   N)rU   rV   rW   unittestTestCaser\   r   r   r   r
   r
      sU        JE JE JE JE JEx0 JE JE JE JE JEr   r
   c                       e Zd ZeZd Zd ZdS )TestInvertedIndexAccumulatorc                     t          | j        | j                                      | j        d          }|                                }h dddhdhh dddhd}|                     ||           d S )Nr3   >   r   r1      r   r2   >   r3   r1   r?      rb   r?   r   r   r   r   r   r   r&   r'   r4   r5   index_to_dictassertDictEqualr)   r:   inverted_indexexpecteds       r   test_accumulate1z-TestInvertedIndexAccumulator.test_accumulate1]   s    .t|T_MMZ
A&& 	 %2244		AA
 
 	X~66666r   c                     t          | j        | j                                      | j        d          }|                                }h ddhdhh dddhd}|                     ||           d S )Nr1      r   r3   r1   r      r2   r3   r1   r2   r3   rd   re   rh   s       r   test_accumulate2z-TestInvertedIndexAccumulator.test_accumulate2l   s    .t|T_MMZ
A&& 	 %2244				A
 
 	X~66666r   N)rU   rV   rW   r   r%   rk   ro   r   r   r   r`   r`   Z   s7        .O7 7 77 7 7 7 7r   r`   c                       e Zd ZeZdS )TestWordOccurrenceAccumulatorN)rU   rV   rW   r   r%   r   r   r   rq   rq   |   s        /OOOr   rq   c                       e Zd ZeZd Zd ZdS )%TestParallelWordOccurrenceAccumulatorc                 D    |                      d| j        | j                  S Nr3   r$   r(   s    r   r*   z6TestParallelWordOccurrenceAccumulator.init_accumulator   s    ##At|T_EEEr   c                 D    |                      d| j        | j                  S ru   r,   r(   s    r   r/   z7TestParallelWordOccurrenceAccumulator.init_accumulator2   s    ##At}d6FGGGr   N)rU   rV   rW   r   r%   r*   r/   r   r   r   rs   rs      s?        7OF F FH H H H Hr   rs   c                       e Zd Zd Zd ZdS )TestCorpusAnalyzerc                      t           j        j         _        t           j        j         _         fdt           j        j        D              _        d S )Nc                 D    g | ]}j                             |          S r   )r'   doc2bow)r   docr)   s     r   
<listcomp>z,TestCorpusAnalyzer.setUp.<locals>.<listcomp>   s)    ^^^cT_$$S))^^^r   )r
   r\   r'   r&   r5   corpusr(   s   `r   setUpzTestCorpusAnalyzer.setUp   sE    '<G$9A^^^^]5W5]^^^ 	r   c                    t          | j                                      | j                  }|                                }h ddhdhh dddhd}|                     ||           |                     d|                    d                     |                     d|                    d	                     |                     d|                    dd
                     |                     d|                    dd	                     d S )Nrm   r   rn   r2   r3   rd   r1   r   r   r   )	r   r&   r4   r~   rf   rg   r6   r7   r8   rh   s       r   test_index_accumulationz*TestCorpusAnalyzer.test_index_accumulation   s   '55@@MM$2244				A
 
 	X~666K77;;<<<K77;;<<<K::2rBBCCCK::2rBBCCCCCr   N)rU   rV   rW   r   r   r   r   r   rx   rx      s:        _ _ _D D D D Dr   rx   __main__)loggingr]   gensim.corpora.dictionaryr   $gensim.topic_coherence.text_analysisr   r   r   r   gensim.test.utilsr   r
   r\   r`   rq   rs   r^   rx   rU   rootsetLevelWARNINGmainr   r   r   <module>r      s     0 0 0 0 0 0            + * * * * *LE LE LE LE LE LE LE LE^7 7 7 7 7=#E 7 7 7D0 0 0 0 0M$F 0 0 0H H H H HM,N H H HD D D D D* D D D4 z L'/***HMOOOOO r   