
    c                         d Z ddlZddlmZ ddlmZ 	 ddlmZmZ n# e	$ r ej
        w xY w ej        e          Z G d de          ZdS )zf
This module allows fast fuzzy search between strings, using kNN queries with Levenshtein similarity.
    N)TermSimilarityIndex)utils)FastSSeditdistc                   2     e Zd ZdZd	 fd	Zd Zd
dZ xZS )LevenshteinSimilarityIndexa|  
    Retrieve the most similar terms from a static set of terms ("dictionary")
    given a query term, using Levenshtein similarity.

    "Levenshtein similarity" is a modification of the Levenshtein (edit) distance,
    defined in [charletetal17]_.

    This implementation uses the :class:`~gensim.similarities.fastss.FastSS` algorithm
    for fast kNN nearest-neighbor retrieval.

    Parameters
    ----------
    dictionary : :class:`~gensim.corpora.dictionary.Dictionary`
        A dictionary that specifies the considered terms.
    alpha : float, optional
        Multiplicative factor `alpha` for the Levenshtein similarity. See [charletetal17]_.
    beta : float, optional
        The exponential factor `beta` for the Levenshtein similarity. See [charletetal17]_.
    max_distance : int, optional
        Do not consider terms with Levenshtein distance larger than this as
        "similar". This is done for performance reasons: keep this value below 3
        for reasonable retrieval performance. Default is 1.

    See Also
    --------
    :class:`~gensim.similarities.termsim.WordEmbeddingSimilarityIndex`
        Retrieve most similar terms for a given term using the cosine
        similarity over word embeddings.
    :class:`~gensim.similarities.termsim.SparseTermSimilarityMatrix`
        Build a term similarity matrix and compute the Soft Cosine Measure.

    References
    ----------

    .. [charletetal17] Delphine Charlet and Geraldine Damnati, "SimBow at SemEval-2017 Task 3:
       Soft-Cosine Semantic Similarity between Questions for Community Question Answering", 2017,
       https://www.aclweb.org/anthology/S17-2051/.

    ?      @   c                    || _         || _        || _        || _        t                              d|           t          | j                                         |          | _        t          t          |                                            d S )Nzcreating FastSS index from %s)wordsmax_dist)
dictionaryalphabetamax_distanceloggerinfor   valuesindexsuperr   __init__)selfr   r   r   r   	__class__s        ?lib/python3.11/site-packages/gensim/similarities/levenshtein.pyr   z#LevenshteinSimilarityIndex.__init__@   sz    $
	(3Z@@@$/"8"8":":\RRR
($//88:::::    c                     t          t          |          t          |                    pd}| j        d|dz  |z  z
  | j        z  z  S )zXCalculate the Levenshtein similarity between two terms given their Levenshtein distance.   g      ?)maxlenr   r   )r   t1t2distancemax_lengthss        r   levsimz!LevenshteinSimilarityIndex.levsimI   sD    #b''3r77++0qzS8c>K#??$)KKKr   
   c                    i }| j         dk    r|| j        j        v r|dz   n|}t          t	          | j                  |          }t          d| j         dz             D ]r}| j                            ||                              |g           D ]+}||k    r	| 	                    |||          }|dk    r|||<   ,t	          |          |k    r nst          |                                d           d|         S )zTkNN fuzzy search: find the `topn` most similar terms from `self.dictionary` to `t1`.r   r   c                 $    | d          | d         fS )Nr   r    )xs    r   <lambda>z9LevenshteinSimilarityIndex.most_similar.<locals>.<lambda>i   s    adUAaDM r   )keyN)r   r   token2idminr    ranger   querygetr%   sorteditems)r   r!   topnresulteffective_topnr#   r"   
similaritys           r   most_similarz'LevenshteinSimilarityIndex.most_similarN   s!   q  	)+t/G)GQTAXXTN T_!5!5~FFN "!T%6%:;;  ***2x88<<XrJJ 0 0BRx ! !%RX!>!>J!A~ 0%/r
v;;.0 E fllnn*A*ABBB5D5IIr   )r	   r
   r   )r&   )__name__
__module____qualname____doc__r   r%   r8   __classcell__)r   s   @r   r   r      sv        & &N; ; ; ; ; ;L L L
J J J J J J J Jr   r   )r<   logginggensim.similarities.termsimr   gensimr   gensim.similarities.fastssr   r   ImportError	NO_CYTHON	getLoggerr9   r   r   r)   r   r   <module>rE      s      ; ; ; ; ; ;      ;;;;;;;;;   
/ 
	8	$	$QJ QJ QJ QJ QJ!4 QJ QJ QJ QJ QJs    *