
    c#                         d Z ddlmZ ddlZddlmZ 	 ddlZn# e$ r  ed          w xY wddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ  G d d          ZdS )a
  
This module integrates `NMSLIB <https://github.com/nmslib/nmslib>`_ fast similarity
search with Gensim's :class:`~gensim.models.word2vec.Word2Vec`, :class:`~gensim.models.doc2vec.Doc2Vec`,
:class:`~gensim.models.fasttext.FastText` and :class:`~gensim.models.keyedvectors.KeyedVectors`
vector embeddings.

.. Important::
    To use this module, you must have the external ``nmslib`` library installed.
    To install it, run ``pip install nmslib``.

To use the integration, instantiate a :class:`~gensim.similarities.nmslib.NmslibIndexer` class
and pass the instance as the `indexer` parameter to your model's `model.most_similar()` method.

Example usage
-------------

.. sourcecode:: pycon

    >>> from gensim.similarities.nmslib import NmslibIndexer
    >>> from gensim.models import Word2Vec
    >>>
    >>> sentences = [['cute', 'cat', 'say', 'meow'], ['cute', 'dog', 'say', 'woof']]
    >>> model = Word2Vec(sentences, min_count=1, epochs=10, seed=2)
    >>>
    >>> indexer = NmslibIndexer(model)
    >>> model.wv.most_similar("cat", topn=2, indexer=indexer)
    [('cat', 1.0), ('meow', 0.16398882865905762)]

Load and save example
---------------------

.. sourcecode:: pycon

    >>> from gensim.similarities.nmslib import NmslibIndexer
    >>> from gensim.models import Word2Vec
    >>> from tempfile import mkstemp
    >>>
    >>> sentences = [['cute', 'cat', 'say', 'meow'], ['cute', 'dog', 'say', 'woof']]
    >>> model = Word2Vec(sentences, min_count=1, seed=2, epochs=10)
    >>>
    >>> indexer = NmslibIndexer(model)
    >>> _, temp_fn = mkstemp()
    >>> indexer.save(temp_fn)
    >>>
    >>> new_indexer = NmslibIndexer.load(temp_fn)
    >>> model.wv.most_similar("cat", topn=2, indexer=new_indexer)
    [('cat', 1.0), ('meow', 0.5595494508743286)]

What is NMSLIB
--------------

Non-Metric Space Library (NMSLIB) is an efficient cross-platform similarity search library and a toolkit
for evaluation of similarity search methods. The core-library does not have any third-party dependencies.
More information about NMSLIB: `github repository <https://github.com/nmslib/nmslib>`_.

Why use NMSIB?
--------------

Gensim's native :py:class:`~gensim.similarities.Similarity` for finding the `k` nearest neighbors to a vector
uses brute force and has linear complexity, albeit with extremely low constant factors.

The retrieved results are exact, which is an overkill in many applications:
approximate results retrieved in sub-linear time may be enough.

NMSLIB can find approximate nearest neighbors much faster, similar to Spotify's Annoy library.
Compared to :py:class:`~gensim.similarities.annoy.Annoy`, NMSLIB has more parameters to
control the build and query time and accuracy. NMSLIB often achieves faster and more accurate
nearest neighbors search than Annoy.

    )absolute_importN)openzQNMSLIB not installed. To use the NMSLIB indexer, please run `pip install nmslib`.)utils)Doc2Vec)Word2Vec)FastText)KeyedVectorsc                   b    e Zd ZdZddZej        fdZed             Z	d Z
d Zd Zd	 Zd
 ZdS )NmslibIndexeraB  This class allows to use `NMSLIB <https://github.com/nmslib/nmslib>`_ as indexer for `most_similar` method
    from :class:`~gensim.models.word2vec.Word2Vec`, :class:`~gensim.models.doc2vec.Doc2Vec`,
    :class:`~gensim.models.fasttext.FastText` and :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` classes.

    Nc                    |ddddd}|ddi}d| _         d| _        || _        || _        || _        |rt          | j        t                    r|                                  dS t          | j        t          t          f          r| 
                                 dS t          | j        t          f          r|                                  dS t          d          dS )a  
        Parameters
        ----------
        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
            Model, that will be used as source for index.
        index_params : dict, optional
            Indexing parameters passed through to NMSLIB:
            https://github.com/nmslib/nmslib/blob/master/manual/methods.md#graph-based-search-methods-sw-graph-and-hnsw

            If not specified, defaults to `{'M': 100, 'indexThreadQty': 1, 'efConstruction': 100, 'post': 0}`.
        query_time_params : dict, optional
            query_time_params for NMSLIB indexer.
            If not specified, defaults to `{'efSearch': 100}`.

        Nd      r   )MindexThreadQtyefConstructionpostefSearchzDmodel must be a Word2Vec, Doc2Vec, FastText or KeyedVectors instance)indexlabelsmodelindex_paramsquery_time_params
isinstancer   _build_from_doc2vecr   r   _build_from_word2vecr	   _build_from_keyedvectors
ValueError)selfr   r   r   s       :lib/python3.11/site-packages/gensim/similarities/nmslib.py__init__zNmslibIndexer.__init__f   s      	]!$SZ[\\L 	2!+S 1

(!2  	i$*g.. i((*****DJ8(<== i))+++++DJ88 i--///// !ghhh	i 	i    c                     |dz   }| j                             |           | j        | j        | j        d}t          |d          5 }t          j        |||           ddd           dS # 1 swxY w Y   dS )a  Save this NmslibIndexer instance to a file.

        Parameters
        ----------
        fname : str
            Path to the output file,
            will produce 2 files: `fname` - parameters and `fname`.d - :class:`~nmslib.NmslibIndex`.
        protocol : int, optional
            Protocol for pickle.

        Notes
        -----
        This method saves **only** the index (**the model isn't preserved**).

        .d)r   r   r   wb)protocolN)r   	saveIndexr   r   r   r   _pickledump)r   fnamer%   
fname_dictdfouts         r   savezNmslibIndexer.save   s      T\

U###!.TE[gkgrss*d## 	5tLD84444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5s   A**A.1A.c                 B   |dz   }t          |d          5 }t          j        |          }ddd           n# 1 swxY w Y   |d         }|d         } | d||          }t          j        dd	          }|                    |           ||_        |d
         |_        |S )zLoad a NmslibIndexer instance from a file.

        Parameters
        ----------
        fname : str
            Path previously used in `save()`.

        r#   rbNr   r   )r   r   r   hnswcosinesimilmethodspacer   )r   r'   loadnmslibinit	loadIndexr   r   )	clsr)   r*   fr+   r   r   nmslib_instancer   s	            r   r5   zNmslibIndexer.load   s     T\
*d## 	 qQA	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 (12#D|Whiii6??? %!"8s   7;;c                     |                      | j        j                                        | j        j        j                   dS )z?Build an NMSLIB index using word vectors from a Word2Vec model.N)_build_from_modelr   wvget_normed_vectorsindex_to_keyr   s    r   r   z"NmslibIndexer._build_from_word2vec   s5    tz}??AA4:=C]^^^^^r!   c                 |    | j         j        }|j        }|                     |                                |           dS )zBBuild an NMSLIB index using document vectors from a Doc2Vec model.N)r   dvr@   r=   r?   )r   docvecsr   s      r   r   z!NmslibIndexer._build_from_doc2vec   s;    *-%w99;;VDDDDDr!   c                 t    |                      | j                                        | j        j                   dS )zCBuild an NMSLIB index using word vectors from a KeyedVectors model.N)r=   r   r?   r@   rA   s    r   r   z&NmslibIndexer._build_from_keyedvectors   s1    tz<<>>
@WXXXXXr!   c                     t          j        dd          }|                    |           |                    | j        d           t          j        || j                   || _        || _        d S )Nr0   r1   r2   T)print_progress)	r6   r7   addDataPointBatchcreateIndexr   setQueryTimeParamsr   r   r   )r   vectorsr   r   s       r   r=   zNmslibIndexer._build_from_model   sn    6???((($+DAAA!%)?@@@
r!   c                       j                             |                    dd          |          d         \  }} fdt          ||          D             S )a  Find the approximate `num_neighbors` most similar items.

        Parameters
        ----------
        vector : numpy.array
            Vector for a word or document.
        num_neighbors : int
            How many most similar items to look for?

        Returns
        -------
        list of (str, float)
            List of most similar items in the format `[(item, cosine_similarity), ... ]`.

        r   )kr   c                 :    g | ]\  }}j         |         d |z
  fS )g      ?)r   ).0id_distancer   s      r   
<listcomp>z.NmslibIndexer.most_similar.<locals>.<listcomp>   s-    \\\}sHS!3>2\\\r!   )r   knnQueryBatchreshapezip)r   vectornum_neighborsids	distancess   `    r   most_similarzNmslibIndexer.most_similar   s_      11&..B2G2G=1YYZ[\Y ]\\\CQZH[H[\\\\r!   )NN)__name__
__module____qualname____doc__r    r   PICKLE_PROTOCOLr-   classmethodr5   r   r   r   r=   r[    r!   r   r   r   _   s         )i )i )i )iV $)#8 5 5 5 5,   [*_ _ _E E EY Y Y  ] ] ] ] ]r!   r   )r_   
__future__r   pickler'   
smart_openr   r6   ImportErrorgensimr   gensim.models.doc2vecr   gensim.models.word2vecr   gensim.models.fasttextr   gensim.modelsr	   r   rb   r!   r   <module>rl      s'  E EP ' & & & & &          kMMMM k k k
+i
j
jjk       ) ) ) ) ) ) + + + + + + + + + + + + & & & & & &J] J] J] J] J] J] J] J] J] J]s    *