
    v5`!                        d dl mZ d dlmZmZmZ d dlmZ ddlm	Z
mZ ddlmZ g dZ G d d	e          Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de
          Z e            Z e            Z e            Z e            Z e            Z e            Z e            Z e            Z e            Z  e            Z!dS )    )reduce)islicepermutationsrepeat)log   )BaseBaseSimilarity)DamerauLevenshtein)JaccardSorensenTverskyOverlapCosineTanimoto
MongeElkanBagjaccardsorensentverskysorensen_diceoverlapcosinetanimotomonge_elkanbagc                   &    e Zd ZdZd	dZd Zd ZdS )
r   aY  
    Compute the Jaccard similarity between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 1 means equal,
    and 0 totally different.

    https://en.wikipedia.org/wiki/Jaccard_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/jaccard.js
    r   FTc                 0    || _         || _        || _        d S Nqvalas_setexternalselfr!   r"   r#   s       Clib/python3.11/site-packages/textdistance/algorithms/token_based.py__init__zJaccard.__init__       	     c                     dS Nr    r%   	sequencess     r&   maximumzJaccard.maximum#       qr)   c                      | j         | }||S  | j        | } | j        | }|                     |          } | j        | }|                     |          }||z  S r   )quick_answer_get_counters_intersect_counters_count_counters_union_counters)r%   r.   resultintersectionunions        r&   __call__zJaccard.__call__&   sz    ""I.M&D&	2	/t/;++L99$$i0$$U++e##r)   Nr   FT__name__
__module____qualname____doc__r'   r/   r:   r,   r)   r&   r   r      sP         ! ! ! !
  
$ 
$ 
$ 
$ 
$r)   r   c                   &    e Zd ZdZd	dZd Zd ZdS )
r   an  
    Compute the Sorensen distance between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 0 means equal,
    and 1 totally different.

    https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/dice.js
    r   FTc                 0    || _         || _        || _        d S r   r    r$   s       r&   r'   zSorensen.__init__=   r(   r)   c                     dS r+   r,   r-   s     r&   r/   zSorensen.maximumB   r0   r)   c                        j         | }||S   j        | }t           fd|D                       }  j        | }                     |          }d|z  |z  S )Nc              3   B   K   | ]}                     |          V  d S r   r5   .0sr%   s     r&   	<genexpr>z$Sorensen.__call__.<locals>.<genexpr>K   s1      ??D((++??????r)   g       @)r2   r3   sumr4   r5   )r%   r.   r7   countr8   s   `    r&   r:   zSorensen.__call__E   s    ""I.M&D&	2	????Y?????/t/;++L99\!E))r)   Nr;   r<   r,   r)   r&   r   r   3   sP         ! ! ! !
  	* 	* 	* 	* 	*r)   r   c                   &    e Zd ZdZd	dZd Zd ZdS )
r   zTversky index

    https://en.wikipedia.org/wiki/Tversky_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/tversky.js
    r   NFTc                 j    || _         |pt          d          | _        || _        || _        || _        d S r+   )r!   r   ksbiasr"   r#   )r%   r!   rO   rP   r"   r#   s         r&   r'   zTversky.__init__W   s3    	/q			 r)   c                     dS r+   r,   r-   s     r&   r/   zTversky.maximum^   r0   r)   c                       j         | }||S   j        | }  j        | }                     |          } fd|D             }t	          t           j        t          |                              }t          |          dk    s j        (|}t          ||          D ]\  }}||||z
  z  z  }||z  S |\  }}|\  }	}
t          ||g          }t          ||g          }| j        z   }|	|
z  ||z
  z  ||
z  z   }|||z   z  S )Nc                 :    g | ]}                     |          S r,   rF   rG   s     r&   
<listcomp>z$Tversky.__call__.<locals>.<listcomp>i   '    @@@T))!,,@@@r)      )r2   r3   r4   r5   listr   rO   lenrP   zipminmax)r%   r.   r7   r8   rO   krI   s1s2alphabetaa_valb_valc_vals   `             r&   r:   zTversky.__call__a   sG   ""I.M&D&	2	/t/;++L99@@@@i@@@	&#i..1122y>>Q$)"3!FB	** 1 11!q</00&((BtRHRHty(/%$,>''r)   )r   NNFTr<   r,   r)   r&   r   r   Q   sP         
! ! ! !  ( ( ( ( (r)   r   c                   &    e Zd ZdZd	dZd Zd ZdS )
r   zoverlap coefficient

    https://en.wikipedia.org/wiki/Overlap_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/overlap.js
    r   FTc                 0    || _         || _        || _        d S r   r    r$   s       r&   r'   zOverlap.__init__   r(   r)   c                     dS r+   r,   r-   s     r&   r/   zOverlap.maximum   r0   r)   c                        j         | }||S   j        | }  j        | }                     |          } fd|D             }|t	          |          z  S )Nc                 :    g | ]}                     |          S r,   rF   rG   s     r&   rT   z$Overlap.__call__.<locals>.<listcomp>   rU   r)   )r2   r3   r4   r5   rZ   )r%   r.   r7   r8   s   `   r&   r:   zOverlap.__call__   sx    ""I.M&D&	2	/t/;++L99@@@@i@@@	c)nn,,r)   Nr;   r<   r,   r)   r&   r   r   {   sP         
! ! ! !
  
- 
- 
- 
- 
-r)   r   c                   &    e Zd ZdZd	dZd Zd ZdS )
r   zcosine similarity (Ochiai coefficient)

    https://en.wikipedia.org/wiki/Cosine_similarity
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/cosine.js
    r   FTc                 0    || _         || _        || _        d S r   r    r$   s       r&   r'   zCosine.__init__   r(   r)   c                     dS r+   r,   r-   s     r&   r/   zCosine.maximum   r0   r)   c                        j         | }||S   j        | }  j        | }                     |          } fd|D             }t	          d |          }|t          |dt          |          z            z  S )Nc                 :    g | ]}                     |          S r,   rF   rG   s     r&   rT   z#Cosine.__call__.<locals>.<listcomp>   rU   r)   c                     | |z  S r   r,   )xys     r&   <lambda>z!Cosine.__call__.<locals>.<lambda>   s
    1q5 r)   g      ?)r2   r3   r4   r5   r   powrX   )r%   r.   r7   r8   prods   `    r&   r:   zCosine.__call__   s    ""I.M&D&	2	/t/;++L99@@@@i@@@	(()44c$c)nn(<====r)   Nr;   r<   r,   r)   r&   r   r      sP         
! ! ! !
  > > > > >r)   r   c                   "     e Zd ZdZ fdZ xZS )r   zTanimoto distance
    This is identical to the Jaccard similarity coefficient
    and the Tversky index for alpha=1 and beta=1.
    c                 z     t                      j        | }|dk    rt          d          S t          |d          S )Nr   -infrV   )superr:   floatr   )r%   r.   r7   	__class__s      r&   r:   zTanimoto.__call__   s9    !!9-Q;;== vq>>!r)   )r=   r>   r?   r@   r:   __classcell__)ry   s   @r&   r   r      sB         " " " " " " " " "r)   r   c                   H    e Zd ZdZ e            ZedddfdZd Zd Zd Z	d	S )
r   a  
    https://www.academia.edu/200314/Generalized_Monge-Elkan_Method_for_Approximate_Text_String_Comparison
    http://www.cs.cmu.edu/~wcohen/postscript/kdd-2003-match-ws.pdf
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/monge-elkan.js
    Fr   Tc                 >    || _         || _        || _        || _        d S r   )	algorithm	symmetricr!   r#   )r%   r}   r~   r!   r#   s        r&   r'   zMongeElkan.__init__   s"    ""	 r)   c                     | j                             |          }|D ]!}|rt          | | j         j        |           }"|S r   )r}   r/   r[   )r%   r.   r7   seqs       r&   r/   zMongeElkan.maximum   sS    ''	22 	C 	CC CV%;T^%;S%ABBr)   c           
      $   |sdS g }|D ]Y}|D ]T}t          d          }|D ]+}t          || j                            ||                    },|                    |           UZt          |          t          |          z  t          |          z  S )Nr   rv   )rx   r[   r}   
similarityappendrK   rX   )r%   r   r.   maxesc1rI   max_simc2s           r&   _calczMongeElkan._calc   s     	1 	& 	&B & &-- N NB!'4>+D+DR+L+LMMGGW%%%%	&
 5zzCHH$s5zz11r)   c                      | j         | }||S  | j        | }| j        rPg }t          |          D ]}|                     | j        |             t          |          t          |          z  S  | j        | S r   )r2   _get_sequencesr~   r   r   r   rK   rX   )r%   r.   r7   seqss       r&   r:   zMongeElkan.__call__   s    ""I.M'D'3	> 	*F$Y// 1 1jdj$/0000v;;V,,4:y))r)   N)
r=   r>   r?   r@   r   _damerau_levenshteinr'   r/   r   r:   r,   r)   r&   r   r      sz         
 .-//!5QY] ! ! ! !  
2 
2 
2* * * * *r)   r   c                       e Zd ZdZd ZdS )r   zgBag distance
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/distance/bag.js
    c                 j       j         | }  j        |  fd|D             }t          |          S )Nc              3   H   K   | ]}                     |z
            V  d S r   rF   )rH   sequencer8   r%   s     r&   rJ   zBag.__call__.<locals>.<genexpr>   s6      ]]xT))(\*ABB]]]]]]r)   )r3   r4   r[   )r%   r.   r8   s   ` @r&   r:   zBag.__call__   sK    &D&	2	/t/;]]]]]S\]]]	9~~r)   N)r=   r>   r?   r@   r:   r,   r)   r&   r   r      s-             r)   r   N)"	functoolsr   	itertoolsr   r   r   mathr   baser	   _Baser
   _BaseSimilarity
edit_basedr   __all__r   r   r   r   r   r   r   r   r   r   dicer   r   r   r   r   r   r   r,   r)   r&   <module>r      sK         2 2 2 2 2 2 2 2 2 2       C B B B B B B B * * * * * *  $ $ $ $ $o $ $ $>* * * * * * * *<'( '( '( '( '(o '( '( '(T- - - - -o - - -6> > > > >_ > > >8
" 
" 
" 
" 
"w 
" 
" 
"-* -* -* -* -* -* -* -*`	 	 	 	 	% 	 	 	 
cee	xzz
'))jll
'))8::

8::
'))r)   