
    c0                        d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ  G d de          Z G d d	ej                  Z G d
 dej                  Z G d dej                  Z G d dej                  ZdS )    )defaultdictN)BM25ABC)OkapiBM25ModelLuceneBM25ModelAtireBM25Model)
Dictionaryc                   *     e Zd Z fdZd Zd Z xZS )BM25Stubc                 :     t                      j        |i | d S N)super__init__)selfargskwargs	__class__s      :lib/python3.11/site-packages/gensim/test/test_bm25model.pyr   zBM25Stub.__init__   s%    $)&)))))    c                     t                      S r   )dict)r   dfsnum_docss      r   precompute_idfszBM25Stub.precompute_idfs   s    vvr   c                     |S r    )r   
num_tokensterm_frequenciesidfss       r   get_term_weightszBM25Stub.get_term_weights   s    r   )__name__
__module____qualname__r   r   r   __classcell__)r   s   @r   r
   r
      sV        * * * * *               r   r
   c                        e Zd Zd Zd Zd ZdS )BM25ABCTestc                     g dddgddgg| _         t          | j                   | _        t          t	          t
          | j                             t          | j                   z  | _        d S )Ncatdogmouser(   lion)	documentsr   
dictionarysummaplenexpected_avgdl)r   s    r   setUpzBM25ABCTest.setUp   s`    111E6?UFOT$T^44!#c4>":":;;c$.>Q>QQr   c                     t          t          | j        j        | j                            }t          |          }|j        }|                     | j        |           d S )N)corpus)	listr/   r-   doc2bowr,   r
   avgdlassertAlmostEqualr1   )r   r4   modelactual_avgdls       r   test_avgdl_from_corpusz"BM25ABCTest.test_avgdl_from_corpus    sV    c$/14>BBCC'''{t2LAAAAAr   c                 t    t          | j                  }|j        }|                     | j        |           d S )N)r-   )r
   r-   r7   r8   r1   )r   r9   r:   s      r   test_avgdl_from_dictionaryz&BM25ABCTest.test_avgdl_from_dictionary&   s:    DO444{t2LAAAAAr   N)r    r!   r"   r2   r;   r=   r   r   r   r%   r%      sL        R R RB B BB B B B Br   r%   c                   &    e Zd Zd Zd Zd Zd ZdS )OkapiBM25ModelTestc                     g dddgddgg _         t           j                    _        d\   _         _         _         fd} |d          } |d          } |d          } |d          }||z   |z   |z   t           j                  z  } j        |z  }|dk    r|n| _        |dk    r|n| _        |dk    r|n| _	        |dk    r|n| _
        d S )	Nr'   r(   r+         ?      ?g      ?c                      t          t           fdj                            }t          j        t          j                  |z
  dz   |dz   z            S )Nc                     | v S r   r   documentwords    r   <lambda>z;OkapiBM25ModelTest.setUp.<locals>.get_idf.<locals>.<lambda>3       1A r         ?r.   r/   r,   mathlogr0   rH   	frequencyr   s   ` r   get_idfz)OkapiBM25ModelTest.setUp.<locals>.get_idf2   sU    C A A A A4>RRSSI8S009<sBySVWXXXr   r)   r*   r   )r,   r   r-   k1bepsilonr0   expected_dog_idfexpected_cat_idfexpected_mouse_idfexpected_lion_idf)r   rQ   dog_idfcat_idf	mouse_idflion_idfaverage_idfepss   `       r   r2   zOkapiBM25ModelTest.setUp-   s!   111E6?UFOT$T^44(7%	Y 	Y 	Y 	Y 	Y '%..'%..GG$$	76??(94x?3tCWCWWl[(+2Q; ?C+2Q; ?C/81}"E))#-5\!Bsr   c                 b   t          t          | j        j        | j                            }t          || j        | j        | j                  }|j	        | j        j
        d                  }|j	        | j        j
        d                  }|j	        | j        j
        d                  }|j	        | j        j
        d                  }|                     | j        |           |                     | j        |           |                     | j        |           |                     | j        |           d S )N)r4   rR   rS   rT   r)   r(   r*   r+   )r5   r/   r-   r6   r,   r   rR   rS   rT   r   token2idr8   rU   rV   rW   rX   r   r4   r9   actual_dog_idfactual_cat_idfactual_mouse_idfactual_lion_idfs          r   test_idfs_from_corpusz(OkapiBM25ModelTest.test_idfs_from_corpusC   s    c$/14>BBCCfDFDLYYYDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                    t          | j        | j        | j        | j                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|                     | j        |           |                     | j	        |           |                     | j
        |           |                     | j        |           d S )Nr-   rR   rS   rT   r)   r(   r*   r+   )r   r-   rR   rS   rT   r   r`   r8   rU   rV   rW   rX   r   r9   rb   rc   rd   re   s         r   test_idfs_from_dictionaryz,OkapiBM25ModelTest.test_idfs_from_dictionaryQ   s    $/dgY]YefffDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                     t           j         j         j         j                   j        d          j                                      }t          d           }|                    |                    | j        j	        d                  }| j        j	        d                  }| j        j	        d                  }| j        j	        d                  } fd}dv r |d          nd	}dv r |d          nd	}	dv r |d          nd	}
dv r |d          nd	} 
                    ||            
                    |	|            
                    |
|            
                    ||           d S )
Nrh   r   c                      dS N        r   r   r   r   rI   z/OkapiBM25ModelTest.test_score.<locals>.<lambda>c       c r   r)   r(   r*   r+   c                     j         j        j        |                   }j        dz   }dj        dj        z
  j        t                    z  j        z  z   z  z   }||z  |z  S N   r   r-   r`   rR   rS   r0   r7   rH   idf	numeratordenominatorfirst_documentr9   r   s       r   get_expected_weightz:OkapiBM25ModelTest.test_score.<locals>.get_expected_weightk   g    *T_5d;<C!IdgTVdfs>?R?R6RUZU`6`)`aaK?[00r   rn   )r   r-   rR   rS   rT   r,   r6   r   updater`   r8   r   	first_bowweightsactual_dog_weightactual_cat_weightactual_mouse_weightactual_lion_weightry   expected_dog_weightexpected_cat_weightexpected_mouse_weightexpected_lion_weightrx   r9   s   `           @@r   
test_scorezOkapiBM25ModelTest.test_score^   s   $/dgY]Yefff*O++N;;	kk**uY'(((#DO$<U$CD#DO$<U$CD%do&>w&GH$T_%=f%EF	1 	1 	1 	1 	1 	1 	1 =B^<S\11%888Y\<A^<S\11%888Y\@G>@Y b 3 3G < < <_b>D>V_226:::\_24EFFF24EFFF46IJJJ35GHHHHHr   Nr    r!   r"   r2   rf   rj   r   r   r   r   r?   r?   ,   s^        C C C,H H HH H HI I I I Ir   r?   c                   &    e Zd Zd Zd Zd Zd ZdS )LuceneBM25ModelTestc                     g dddgddgg _         t           j                    _        d\   _         _         fd} |d           _         |d           _         |d           _         |d           _        d S )Nr'   r(   r+   )rB   rC   c                      t          t           fdj                            }t          j        dt          j                  |z
  dz   |dz   z  z             S )Nc                     | v S r   r   rF   s    r   rI   z<LuceneBM25ModelTest.setUp.<locals>.get_idf.<locals>.<lambda>   rJ   r   g      ?rK   rL   rO   s   ` r   rQ   z*LuceneBM25ModelTest.setUp.<locals>.get_idf   sZ    C A A A A4>RRSSI8C3t~#6#6#BS#HYY\_"]]^^^r   r)   r*   )	r,   r   r-   rR   rS   rU   rV   rW   rX   r   rQ   s   ` r   r2   zLuceneBM25ModelTest.setUp}   s    111E6?UFOT$T^44#	_ 	_ 	_ 	_ 	_ !( '")''"2"2!(r   c                 V   t          t          | j        j        | j                            }t          || j        | j                  }|j        | j        j	        d                  }|j        | j        j	        d                  }|j        | j        j	        d                  }|j        | j        j	        d                  }| 
                    | j        |           | 
                    | j        |           | 
                    | j        |           | 
                    | j        |           d S N)r4   rR   rS   r)   r(   r*   r+   )r5   r/   r-   r6   r,   r   rR   rS   r   r`   r8   rU   rV   rW   rX   ra   s          r   rf   z)LuceneBM25ModelTest.test_idfs_from_corpus   s    c$/14>BBCCv$'TVDDDDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                    t          | j        | j        | j                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|                     | j        |           |                     | j        |           |                     | j	        |           |                     | j
        |           d S Nr-   rR   rS   r)   r(   r*   r+   )r   r-   rR   rS   r   r`   r8   rU   rV   rW   rX   ri   s         r   rj   z-LuceneBM25ModelTest.test_idfs_from_dictionary   s    4?tw$&QQQDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                     t           j         j         j                   j        d          j                                      }t          d           }|                    |                    | j        j        d                  }| j        j        d                  }| j        j        d                  }| j        j        d                  } fd}dv r |d          nd	}dv r |d          nd	}	dv r |d          nd	}
dv r |d          nd	} 	                    ||            	                    |	|            	                    |
|            	                    ||           d S )
Nr   r   c                      dS rm   r   r   r   r   rI   z0LuceneBM25ModelTest.test_score.<locals>.<lambda>   ro   r   r)   r(   r*   r+   c                     j         j        j        |                   }dj        dj        z
  j        t                    z  j        z  z   z  z   }||z  S rq   rs   )rH   ru   rw   rx   r9   r   s      r   ry   z;LuceneBM25ModelTest.test_score.<locals>.get_expected_weight   sX    *T_5d;<CdgTVdfs>?R?R6RUZU`6`)`aaK$$r   rn   )
r   r-   rR   rS   r,   r6   r   r{   r`   r8   r|   s   `           @@r   r   zLuceneBM25ModelTest.test_score   s   4?tw$&QQQ*O++N;;	kk**uY'(((#DO$<U$CD#DO$<U$CD%do&>w&GH$T_%=f%EF	% 	% 	% 	% 	% 	% 	%
 =B^<S\11%888Y\<A^<S\11%888Y\@G>@Y b 3 3G < < <_b>D>V_226:::\_24EFFF24EFFF46IJJJ35GHHHHHr   Nr   r   r   r   r   r   |   s[        1 1 1H H HH H HI I I I Ir   r   c                   &    e Zd Zd Zd Zd Zd ZdS )AtireBM25ModelTestc                     g dddgddgg _         t           j                    _        d\   _         _         _         fd} |d           _         |d           _         |d           _         |d           _	        d S )Nr'   r(   r+   rA   c                      t          t           fdj                            }t          j        t          j                  |z            S )Nc                     | v S r   r   rF   s    r   rI   z;AtireBM25ModelTest.setUp.<locals>.get_idf.<locals>.<lambda>   rJ   r   rL   rO   s   ` r   rQ   z)AtireBM25ModelTest.setUp.<locals>.get_idf   sF    C A A A A4>RRSSI8C//);<<<r   r)   r*   )
r,   r   r-   rR   rS   rT   rU   rV   rW   rX   r   s   ` r   r2   zAtireBM25ModelTest.setUp   s    111E6?UFOT$T^44(7%	= 	= 	= 	= 	= !( '")''"2"2!(r   c                 V   t          t          | j        j        | j                            }t          || j        | j                  }|j        | j        j	        d                  }|j        | j        j	        d                  }|j        | j        j	        d                  }|j        | j        j	        d                  }| 
                    | j        |           | 
                    | j        |           | 
                    | j        |           | 
                    | j        |           d S r   )r5   r/   r-   r6   r,   r   rR   rS   r   r`   r8   rU   rV   rW   rX   ra   s          r   rf   z(AtireBM25ModelTest.test_idfs_from_corpus   s    c$/14>BBCCfDFCCCDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                    t          | j        | j        | j                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|j        | j        j        d                  }|                     | j        |           |                     | j        |           |                     | j	        |           |                     | j
        |           d S r   )r   r-   rR   rS   r   r`   r8   rU   rV   rW   rX   ri   s         r   rj   z,AtireBM25ModelTest.test_idfs_from_dictionary   s    $/dgPPPDO$<U$CDDO$<U$CD :do&>w&GH*T_%=f%EFt4nEEEt4nEEEt68HIIIt5GGGGGr   c                     t           j         j         j                   j        d          j                                      }t          d           }|                    |                    | j        j        d                  }| j        j        d                  }| j        j        d                  }| j        j        d                  } fd}dv r |d          nd	}dv r |d          nd	}	dv r |d          nd	}
dv r |d          nd	} 	                    ||            	                    |	|            	                    |
|            	                    ||           d S )
Nr   r   c                      dS rm   r   r   r   r   rI   z/AtireBM25ModelTest.test_score.<locals>.<lambda>   ro   r   r)   r(   r*   r+   c                     j         j        j        |                   }j        dz   }dj        dj        z
  j        t                    z  j        z  z   z  z   }||z  |z  S rq   rs   rt   s       r   ry   z:AtireBM25ModelTest.test_score.<locals>.get_expected_weight   rz   r   rn   )
r   r-   rR   rS   r,   r6   r   r{   r`   r8   r|   s   `           @@r   r   zAtireBM25ModelTest.test_score   s   $/dgPPP*O++N;;	kk**uY'(((#DO$<U$CD#DO$<U$CD%do&>w&GH$T_%=f%EF	1 	1 	1 	1 	1 	1 	1 =B^<S\11%888Y\<A^<S\11%888Y\@G>@Y b 3 3G < < <_b>D>V_226:::\_24EFFF24EFFF46IJJJ35GHHHHHr   Nr   r   r   r   r   r      s[        1 1 1H H HH H HI I I I Ir   r   )collectionsr   rM   unittestgensim.models.bm25modelr   gensim.modelsr   r   r   gensim.corporar   r
   TestCaser%   r?   r   r   r   r   r   <module>r      s   $ # # # # #   + + + + + + I I I I I I I I I I % % % % % %         w      B B B B B(# B B B&MI MI MI MI MI* MI MI MI`DI DI DI DI DI(+ DI DI DINEI EI EI EI EI* EI EI EI EI EIr   