
    cC              
       J   d Z ddlZddlZddlZddlZddlmZ ddlmZ ej	        
                    e          Zd Zd Zej        dd            Zg d	g d
g dg dg ddgddgg dg dg	Z ee          Zd eD             Z G d d          Z e e                      ZdS )a  Module contains common utilities used in automated code tests for Gensim modules.

Attributes:

module_path : str
    Full path to this module directory.

common_texts : list of list of str
    Toy dataset.

common_dictionary : :class:`~gensim.corpora.dictionary.Dictionary`
    Dictionary of toy dataset.

common_corpus : list of list of (int, int)
    Corpus of toy dataset.


Examples:

It's easy to keep objects in temporary folder and reuse'em if needed:

.. sourcecode:: pycon

    >>> from gensim.models import word2vec
    >>> from gensim.test.utils import get_tmpfile, common_texts
    >>>
    >>> model = word2vec.Word2Vec(common_texts, min_count=1)
    >>> temp_path = get_tmpfile('toy_w2v')
    >>> model.save(temp_path)
    >>>
    >>> new_model = word2vec.Word2Vec.load(temp_path)
    >>> result = new_model.wv.most_similar("human", topn=1)

Let's print first document in toy dataset and then recreate it using its corpus and dictionary.

.. sourcecode:: pycon

    >>> from gensim.test.utils import common_texts, common_dictionary, common_corpus
    >>> print(common_texts[0])
    ['human', 'interface', 'computer']
    >>> assert common_dictionary.doc2bow(common_texts[0]) == common_corpus[0]

We can find our toy set in test data directory.

.. sourcecode:: pycon

    >>> from gensim.test.utils import datapath
    >>>
    >>> with open(datapath("testcorpus.txt")) as f:
    ...     texts = [line.strip().split() for line in f]
    >>> print(texts[0])
    ['computer', 'human', 'interface']

If you don't need to keep temporary objects on disk use :func:`~gensim.test.utils.temporary_file`:

.. sourcecode:: pycon

    >>> from gensim.test.utils import temporary_file, common_corpus, common_dictionary
    >>> from gensim.models import LdaModel
    >>>
    >>> with temporary_file("temp.txt") as tf:
    ...     lda = LdaModel(common_corpus, id2word=common_dictionary, num_topics=3)
    ...     lda.save(tf)

    N)
Dictionary)simple_preprocessc                 N    t           j                            t          d|           S )a  Get full path for file `fname` in test data directory placed in this module directory.
    Usually used to place corpus to test_data directory.

    Parameters
    ----------
    fname : str
        Name of file.

    Returns
    -------
    str
        Full path to `fname` in test_data folder.

    Example
    -------
    Let's get path of test GloVe data file and check if it exits.

    .. sourcecode:: pycon

        >>> from gensim.corpora import MmCorpus
        >>> from gensim.test.utils import datapath
        >>>
        >>> corpus = MmCorpus(datapath("testcorpus.mm"))
        >>> for document in corpus:
        ...     pass


    	test_data)ospathjoinmodule_path)fnames    1lib/python3.11/site-packages/gensim/test/utils.pydatapathr   Q   s    : 7<<[%888    c                 d    t           j                            t          j                    |           S )an  Get full path to file `suffix` in temporary folder.
    This function doesn't creates file (only generate unique name).
    Also, it may return different paths in consecutive calling.

    Parameters
    ----------
    suffix : str
        Suffix of file.

    Returns
    -------
    str
        Path to `suffix` file in temporary folder.

    Examples
    --------
    Using this function we may get path to temporary file and use it, for example, to store temporary model.

    .. sourcecode:: pycon

        >>> from gensim.models import LsiModel
        >>> from gensim.test.utils import get_tmpfile, common_dictionary, common_corpus
        >>>
        >>> tmp_f = get_tmpfile("toy_lsi_model")
        >>>
        >>> model = LsiModel(common_corpus, id2word=common_dictionary)
        >>> model.save(tmp_f)
        >>>
        >>> loaded_model = LsiModel.load(tmp_f)

    )r   r   r	   tempfilemkdtemp)suffixs    r   get_tmpfiler   q   s$    @ 7<<(**F333r    c              #      K   t          j                    }	 t          j                            ||           V  t          j        |d           dS # t          j        |d           w xY w)a  This context manager creates file `name` in temporary directory and returns its full path.
    Temporary directory with included files will deleted at the end of context. Note, it won't create file.

    Parameters
    ----------
    name : str
        Filename.

    Yields
    ------
    str
        Path to file `name` in temporary directory.

    Examples
    --------
    This example demonstrates that created temporary directory (and included
    files) will deleted at the end of context.

    .. sourcecode:: pycon

        >>> import os
        >>> from gensim.test.utils import temporary_file
        >>> with temporary_file("temp.txt") as tf, open(tf, 'w') as outfile:
        ...     outfile.write("my extremely useful information")
        ...     print("Is this file exists? {}".format(os.path.exists(tf)))
        ...     print("Is this folder exists? {}".format(os.path.exists(os.path.dirname(tf))))
        Is this file exists? True
        Is this folder exists? True
        >>>
        >>> print("Is this file exists? {}".format(os.path.exists(tf)))
        Is this file exists? False
        >>> print("Is this folder exists? {}".format(os.path.exists(os.path.dirname(tf))))
        Is this folder exists? False

    T)ignore_errorsN)r   r   r   r   r	   shutilrmtree)nametmps     r   temporary_filer      sn      N 


C/gll3%%%%%c......c.....s   "A A))human	interfacecomputer)surveyuserr   systemresponsetime)epsr    r   r!   )r!   r   r!   r$   )r    r"   r#   treesgraph)r&   minorsr%   )r&   r'   r   c                 B    g | ]}t                               |          S  )common_dictionarydoc2bow).0texts     r   
<listcomp>r.      s'    JJJT"**400JJJr   c                       e Zd Zd ZdS )	LeeCorpusc              #      K   t          t          d                    5 }|D ]}t          |          V  	 d d d            d S # 1 swxY w Y   d S )Nzlee_background.cor)openr   r   )selfflines      r   __iter__zLeeCorpus.__iter__   s      (/0011 	.Q . .'------.	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	.s   AAAN)__name__
__module____qualname__r6   r)   r   r   r0   r0      s#        . . . . .r   r0   )r   )__doc__
contextlibr   r   r   gensim.corporar   gensim.utilsr   r   dirname__file__r
   r   r   contextmanagerr   common_textsr*   common_corpusr0   listlee_corpus_listr)   r   r   <module>rE      st  @ @D      				  % % % % % % * * * * * *gooh''9 9 9@ 4  4  4F */ */ */ */^ '&&@@@***(((   Ig   !!!
 J|,, JJ\JJJ. . . . . . . . $yy{{##r   