
    cX                     >   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
  ej        e          Zedk    re ej        dej                   e                    dd	                    ej                              ed
            ej                    Ze                    ddd           e                    dd           e                    dded           e                    dded           e                    dded           e                    ddedddg            e                    d!d"ed           e                    d#d$ed%           e                    d&d'ed           e                    d(d)ed           e                    d*d+e,           e                    d-d.edddg            e                    d/d0edddg            e                    d1d2           e                                Zej        dk    rdZej        sd3e_        ndZej        sd4e_         e
ej                  Z e	eej        ej         ej!        ej"        ej#        ej        eej$        ej%        dej&        5          Z'ej(        r)ej(        Z)e'j*        +                    e)ej,        6           nej        -                    d7          d         Z)e'.                    e)d8z              ej,        dk    r e'j*        +                    e)d9z   d6           ne'j*        +                    e)d:z   d;6           ej/        rej/        Z0e'/                    e0           e                    d<ej1        2                    ej        d                              dS dS )=a  
USAGE: %(program)s -train CORPUS -output VECTORS -size SIZE -window WINDOW
-cbow CBOW -sample SAMPLE -hs HS -negative NEGATIVE -threads THREADS -iter ITER
-min_count MIN-COUNT -alpha ALPHA -binary BINARY -accuracy FILE

Trains a neural embedding model on text file CORPUS.
Parameters essentially reproduce those used by the original C tool
(see https://code.google.com/archive/p/word2vec/).

Parameters for training:
        -train <file>
                Use text data from <file> to train the model
        -output <file>
                Use <file> to save the resulting word vectors / word clusters
        -size <int>
                Set size of word vectors; default is 100
        -window <int>
                Set max skip length between words; default is 5
        -sample <float>
                Set threshold for occurrence of words. Those that appear with higher frequency in the training data
                will be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)
        -hs <int>
                Use Hierarchical Softmax; default is 0 (not used)
        -negative <int>
                Number of negative examples; default is 5, common values are 3 - 10 (0 = not used)
        -threads <int>
                Use <int> threads (default 3)
        -iter <int>
                Run more training iterations (default 5)
        -min_count <int>
                This will discard words that appear less than <int> times; default is 5
        -alpha <float>
                Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW
        -binary <int>
                Save the resulting vectors in binary moded; default is 0 (off)
        -cbow <int>
                Use the continuous bag of words model; default is 1 (use 0 for skip-gram model)
        -accuracy <file>
                Compute accuracy of the resulting model analogical inference power on questions file <file>
                See an example of questions file
                at https://code.google.com/p/word2vec/source/browse/trunk/questions-words.txt

Example: python -m gensim.scripts.word2vec_standalone -train data.txt          -output vec.txt -size 200 -sample 1e-4 -binary 0 -iter 3
    N)seterr)Word2VecLineSentence__main__z:%(asctime)s : %(threadName)s : %(levelname)s : %(message)s)formatlevelz
running %s raise)allz-trainz0Use text data from file TRAIN to train the modelT)helprequiredz-outputz2Use file OUTPUT to save the resulting word vectors)r   z-windowz6Set max skip length WINDOW between words; default is 5   )r   typedefaultz-sizez(Set size of word vectors; default is 100d   z-samplezSet threshold for occurrence of words. Those that appear with higher frequency in the training data will be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)gMbP?z-hsz1Use Hierarchical Softmax; default is 0 (not used)   )r   r   r   choicesz	-negativezRNumber of negative examples; default is 5, common values are 3 - 10 (0 = not used)z-threadszUse THREADS threads (default 3)   z-iterz(Run more training iterations (default 5)z
-min_countzKThis will discard words that appear less than MIN_COUNT times; default is 5z-alphazPSet the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW)r   r   z-cbowzOUse the continuous bag of words model; default is 1 (use 0 for skip-gram model)z-binaryz=Save the resulting vectors in binary mode; default is 0 (off)z	-accuracyz6Use questions from file ACCURACY to evaluate the modelg?g?)vector_size	min_countworkerswindowsamplealphasghsnegative	cbow_meanepochs)binary.z.modelz
.model.binz
.model.txtFzfinished running %s)3__doc__loggingos.pathossysargparsenumpyr   gensim.models.word2vecr   r   	getLogger__name__loggerbasicConfigINFOinfojoinargvArgumentParserparseradd_argumentintfloat
parse_argsargscbowskipgramr   traincorpussizer   threadsr   r   r   r   itermodeloutputoutfilewvsave_word2vec_formatr    splitsaveaccuracyquestions_filepathbasename     Blib/python3.11/site-packages/gensim/scripts/word2vec_standalone.py<module>rN      s  , ,^   



        9 9 9 9 9 9 9 9		8	$	$ z NFG[cjcopppp
KKchhsx00111
Fw$X$&&F
'Ydhiii
	(\]]]
	(`gjtuvvv
&PWZdghhh
: D  " " " G!aV     n!     
)JQT^_```
&PWZdefff
h!     i     g!aV     W!aV     *bcccDyA~ z 	DJz 	DJ\$*%%FHDI{4;djX7T]a	  E { 	P+%%gdk%BBBB*""3''*

7X%&&&;! 	PH))'L*@)NNNNH))'L*@)OOO} '~&&&
KK%rw'7'7'D'DEEEEE]NF NFrL   