
    c                     (   d Z ddlmZ ddlZddlZddlZddlZddlZ	ddl
mZ ddlmZmZ ddlmZmZ ddlmZ ddlmZ  G d	 d
ej                  Z G d dej                  Zedk    r' ej        ej                    ej                     dS dS )z<
Automated tests for checking the output of gensim.scripts.
    )unicode_literalsN)utils)segment_all_articlessegment_and_write_all_articles)datapathget_tmpfile)word2vec2tensor)KeyedVectorsc                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )TestSegmentWikic                 N    t          d          | _        d| _        g d| _        d S )NzDenwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2	Anarchism)
IntroductionzEtymology and terminologyHistoryzAnarchist schools of thoughtzInternal issues and debateszTopics of interest
Criticisms
ReferenceszFurther readingzExternal links)r   fnameexpected_titleexpected_section_titlesselfs    8lib/python3.11/site-packages/gensim/test/test_scripts.pysetUpzTestSegmentWiki.setUp   s4    dee
)(
 (
 (
$$$    c                     t          d          }ddg}|D ]*}	 t          j        ||z              # t          $ r Y 'w xY wd S )Nz
script.tst z.json)r   osremoveOSError)r   r   
extensionsexts       r   tearDownzTestSegmentWiki.tearDown/   sm    L))']
 	 	C	%#+&&&&   	 	s   1
>>c                 &   t          t          | j        d                    \  }}}|                     || j                   d |D             }|                     || j                   |d         d         }d}|                     ||v            |                     t          |          d           |                     |d         dk               |                     |d         d	k               |                     |d
         dk               d S )NT)include_interlinksc                     g | ]
}|d          S )r    ).0ss     r   
<listcomp>z=TestSegmentWiki.test_segment_all_articles.<locals>.<listcomp>@   s    1111!A$111r   r      zP'''Anarchism''' is a political philosophy that advocates self-governed societies  political philosophyr-   zself-governancezself-governed   zstateless societyzstateless societies)nextr   r   assertEqualr   r   
assertTruelen)r   titlesections
interlinkssection_titlesfirst_section_textfirst_sentences          r   test_segment_all_articlesz)TestSegmentWiki.test_segment_all_articles9   s   &*+?
_c+d+d+d&e&e#x 	 3444 21111)EFFF &a[^k*<<=== 	Z#...
1)YYZZZ
1)MMNNN
1)UUVVVVVr   c                     d}t          d t          | j                  D                       }|                     ||           d S )Nj   c              3      K   | ]}d V  dS r*   Nr&   )r'   xs     r   	<genexpr>z5TestSegmentWiki.test_generator_len.<locals>.<genexpr>P   s"      GG1GGGGGGr   )sumr   r   r2   )r   expected_num_articlesnum_articless      r   test_generator_lenz"TestSegmentWiki.test_generator_lenN   sL     #GG&:4:&F&FGGGGG'<=====r   c                    t          d          }t          | j        |d           d}t          j        |d          5 }t          d |D                       }d d d            n# 1 swxY w Y   |                     ||           d S )Nscript.tst.jsonr*   )workersr=   rbc              3      K   | ]}d V  dS r?   r&   )r'   lines     r   rA   z0TestSegmentWiki.test_json_len.<locals>.<genexpr>Z   s"      //Tq//////r   )r   r   r   r   openrB   r2   )r   tmpfrC   frD   s        r   test_json_lenzTestSegmentWiki.test_json_lenT   s    ,--&tz4CCCC #Zd## 	0q//Q/////L	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0'<=====s   A$$A(+A(c                    t          d          }t          | j        |dd           t          |          5 }t	          |          }d d d            n# 1 swxY w Y   t          j        |          }|d         |d         |d         }}}|                     || j                   |                     || j	                   |                     t          |          d           |                     t          |d	                   d
           |                     t          |d                   d           |                     t          |d                   d           d S )NrG   r*   T)rH   r$   r5   r8   r7   r+   r   r,   r.   r/   r0   )r   r   r   rL   r1   jsonloadsr2   r   r   r4   tuple)r   rM   rN   firstarticler5   r8   r7   s           r   #test_segment_and_write_all_articlesz3TestSegmentWiki.test_segment_and_write_all_articles]   s   ,--&tz4W[\\\\ $ZZ 	1GGE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 *U##,3G,<gFV>WY`amYnz~ 3444)EFFF 	Z#...z!}--/_```z!}--/STTTz!}--/[\\\\\s   AAAN)	__name__
__module____qualname__r   r"   r;   rE   rO   rV   r&   r   r   r   r      sv        
 
 
   W W W*> > >> > >] ] ] ] ]r   r   c                       e Zd Zd Zd ZdS )TestWord2Vec2Tensorc                     t          d          | _         t          d          | _        | j        dz   | _        | j        dz   | _        | j        dz   | _        d S )Nword2vec_pre_kv_c
w2v2t_testz_metadata.tsvz_tensor.tsvz_vector.tsv)r   r   output_foldermetadata_filetensor_filevector_filer   s    r   r   zTestWord2Vec2Tensor.setUpu   sX     !455(66!//A-=-=r   c           
         t          | j        | j                   t          j        | j        d          5 }|                                }d d d            n# 1 swxY w Y   t          j        | j        d          5 }|                                }d d d            n# 1 swxY w Y   t          j        | j        d          5 }|                                	                                }d d d            n# 1 swxY w Y   t          t          |                    d                    \  }}|                     t          |          t          |          cxk    o|k    nc d| j        d| j        d           d |D             }d |D             }t          j        | j        d	
          }t#          ||          D ]\  }}	|                    d          }
|	                    d          }t'          j        t+          t          t,          |                                                              }t&          j                            ||
         |d           d S )N)word2vec_model_pathtensor_filenamerI       zMetadata file z and tensor file z  imply different number of rows.c                 6    g | ]}|                                 S r&   )strip)r'   words     r   r)   z7TestWord2Vec2Tensor.test_conversion.<locals>.<listcomp>   s     666TDJJLL666r   c                 :    g | ]}|                     d d          S )   	rf   )replace)r'   vectors     r   r)   z7TestWord2Vec2Tensor.test_conversion.<locals>.<listcomp>   s&    EEE66>>%..EEEr   F)binaryutf8   )decimal)r	   r   r_   r   rL   r`   	readlinesra   readlinerh   mapintsplitr3   r4   r
   load_word2vec_formatzipdecodenparraylistfloattestingassert_almost_equal)r   rN   metadatavectors
first_linenumber_wordsvector_size
orig_modelri   rm   word_stringvector_stringvector_arrays                r   test_conversionz#TestWord2Vec2Tensor.test_conversion|   s   DM4K]^^^^Z*D11 	%Q{{}}H	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% Z($// 	$1kkmmG	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ Zt,, 	.++--J	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. %(Z-=-=d-C-C$D$D!kHWEEEEEEEE%%%t'7'7'79	; 	; 	;
 76X666EEWEEE "6t}USSS
  '22 	] 	]LD&++f--K"MM&11M8DUM4G4G4I4I)J)J$K$KLLLJ**:k+BLZ[*\\\\		] 	]s5   AAA<BB!$B!'C55C9<C9N)rW   rX   rY   r   r   r&   r   r   r[   r[   t   s7        > > >] ] ] ] ]r   r[   __main__)level)__doc__
__future__r   rQ   loggingos.pathr   unittestnumpyrz   gensimr   gensim.scripts.segment_wikir   r   gensim.test.utilsr   r   gensim.scripts.word2vec2tensorr	   gensim.modelsr
   TestCaser   r[   rW   basicConfigDEBUGmainr&   r   r   <module>r      so    ( ' ' ' ' '               \ \ \ \ \ \ \ \ 3 3 3 3 3 3 3 3 : : : : : : & & & & & &T] T] T] T] T]h' T] T] T]n&] &] &] &] &](+ &] &] &]R z Ggm,,,,HMOOOOO r   