
    _nd8                     :   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d Zd Zd Zd Zd Zd Zd Z ej!        "                    dd          d             Z#d Z$d Z%eej!        "                    dd          d                         Z&ed             Z'd Z(ej!        "                    dd          d             Z)ej!        "                    dd          d             Z*d Z+d  Z,d! Z-d" Z.d# Z/d$ Z0ej!        "                    d%g d&          d'             Z1d( Z2ej!        "                    d)d*          d+             Z3ej!        "                    d)d*          d,             Z4dS )-    N)
block_diag)
csr_matrix)psi)assert_array_equal)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)assert_allclose)assert_array_almost_equal)assert_almost_equal)!if_safe_multiprocessing_with_blas)NotFittedError)StringIOc                      d} t          j        d| t                    }|g| z  }t          | }t	          |          }| |fS )N   )r   r   )dtype)npfullintr   r   )n_componentsblockblocksXs       Klib/python3.11/site-packages/sklearn/decomposition/tests/test_online_lda.py_build_sparse_mtxr      sK     LGFL444EW|#FFA1A!    c                      t                      \  } }d| z  }t          | ||d          }t          | d          }|                    |          }|                    |          }t          ||           d S )Ng      ?r   )r   doc_topic_priortopic_word_priorrandom_stater   r    )r   r   fit_transformr   )r   r   priorlda_1lda_2topic_distr_1topic_distr_2s          r   test_lda_default_prior_paramsr(   %   s     ())OL!,E%!	  E &<aPPPE''**M''**M}55555r   c                  v   t           j                            d          } t                      \  }}t	          |dd|           }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr      batch)r   evaluate_everylearning_methodr    )r   r*      )r         )         r   randomRandomStater   r   fitcomponents_setargsorttuplesortedrngr   r   ldacorrect_idx_grps	componenttop_idxs          r   test_lda_fit_batchrF   6   s    
)


"
"C'))OL!
#!	  C GGAJJJ888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                  x   t           j                            d          } t                      \  }}t	          |ddd|           }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr         $@r*   online)r   learning_offsetr,   r-   r    r.   r5   r6   r7   r@   s          r   test_lda_fit_onlinerK   I   s    
)


"
"C'))OL!
#!   C GGAJJJ888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                     t           j                            d          } t                      \  }}t	          |dd|           }t          d          D ]}|                    |           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr   rH   d   r   rJ   total_samplesr    r   r.   r5   r6   r   r8   r9   r   r   rangepartial_fitr;   r<   r=   r>   r?   rA   r   r   rB   irC   crE   s           r   test_lda_partial_fitrV   ]   s     )


"
"C'))OL!
#!	  C 1XX  888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                     t           j                            d          } t                      \  }}t	          |d|           }|                    |                                           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr   r+   r   r-   r    r.   r5   r6   )r   r8   r9   r   r   r:   toarrayr;   r<   r=   r>   r?   r@   s          r   test_lda_dense_inputrZ   q   s    
)


"
"C'))OL!
#!7  C GGAIIKK888_ : :	i''))"##.ttt455VG__%%)999999: :r   c                  x   t           j                            d          } |                     dd          }d}t	          ||           }|                    |          }|dk                                    sJ t          t          j        |d	          t          j	        |j
        d                              d S )
Nr   r1      
   sizer   r!   g        r*   axis)r   r8   r9   randintr   r"   anyr   sumonesshape)rA   r   r   rB   X_transs        r   test_lda_transformri      s     )


"
"CAH%%AL
#C
P
P
PC""GcM     bfW1555rww}Q?O7P7PQQQQQr   method)rI   r+   c                    t           j                            d          }|                    dd          }t	          d| |          }|                    |          }|                    |          }t          ||d           d S )Nr   r^   )2   r]   r_   r1   rX   r0   )r   r8   r9   rc   r   r"   	transformr   )rj   rA   r   rB   X_fitrh   s         r   test_lda_fit_transformro      s     )


"
"CBX&&A
#S  C a  EmmAGeWa00000r   c                      t          j        dd          } t                      }d}t          j        t
          |          5  |                    |            d d d            d S # 1 swxY w Y   d S )N)r1   r^         z^Negative values in data passedmatch)r   r   r   pytestraises
ValueErrorr:   )r   rB   regexs      r   test_lda_negative_inputrx      s    
A
#
%
%C.E	z	/	/	/  


                 s   A$$A(+A(c                  $   t           j                            d          } |                     dd          }t	                      }d}t          j        t          |          5  |                    |           d d d            d S # 1 swxY w Y   d S )Nr   r0   r\   r_   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rr   )	r   r8   r9   rc   r   rt   ru   r   
perplexity)rA   r   rB   rw   s       r   test_lda_no_component_errorr{      s    
)


"
"CAH%%A
#
%
%C	 

 
~U	3	3	3  q                 s   "BB	B	c                 x   t                      \  }}t          j                            d          }t	          |d| d|          }|                    |           g d}|j        D ]T}t          |                                dd          d d d                   }t          t          |                    |v sJ Ud S )Nr   r/   r*   )r   n_jobsr-   r,   r    r.   r5   r6   )r   r   r8   r9   r   r:   r;   r<   r=   r>   r?   )rj   r   r   rA   rB   rC   rU   rE   s           r   test_lda_multi_jobsr~      s     ())OL!
)


"
"C
#!  C GGAJJJ888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                     t           j                            d          } t                      \  }}t	          |ddd|           }t          d          D ]}|                    |           g d}|j        D ]T}t          |	                                dd          d d d                   }t          t          |                    |v sJ Ud S )	Nr   r/         @   )r   r}   rJ   rO   r    r.   r5   r6   rP   rS   s           r   test_lda_partial_fit_multi_jobsr      s     )


"
"C'))OL!
#!  C 1XX  888_ : :aiikk"##&ttt,--VG__%%)999999: :r   c                     t           j                            d          } |                     dd          }|                     dd          }t           j                            d|df          }t	          |dd| 	          }|                    |           |                     d|d
z   |f          }t          j        t          d          5  |	                    ||           d d d            n# 1 swxY w Y   |                     d||d
z   f          }t          j        t          d          5  |	                    ||           d d d            d S # 1 swxY w Y   d S )Nr   r   r2   r^   r0   r_   r   r]   rN   r*   zNumber of samplesrr   zNumber of topics)
r   r8   r9   rc   r   r:   rt   ru   rv   _perplexity_precomp_distr)rA   r   	n_samplesr   rB   invalid_n_samplesinvalid_n_componentss          r   test_lda_preplexity_mismatchr      s   
)


"
"C;;q!$$LAr""I
	!9b/22A
#!	  C GGAJJJAY]L,IJJ	z)=	>	>	> < <%%a):;;;< < < < < < < < < < < < < < < ;;q	<!;K/L;MM	z)<	=	=	= ? ?%%a)=>>>? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?s$   C11C58C53EEEc                    t                      \  }}t          |d| dd          }t          |d| dd          }|                    |           |                    |d          }|                    |           |                    |d          }||k    sJ |                    |d          }|                    |d          }||k    sJ d S )	Nr*   rM   r   r   max_iterr-   rO   r    r^   Fsub_samplingT)r   r   r:   rz   )	rj   r   r   r$   r%   perp_1perp_2perp_1_subsamplingperp_2_subsamplings	            r   test_lda_perplexityr      s    ())OL!%!  E &!  E 
IIaLLLae44F	IIaLLLae44FV))!$)??))!$)??!3333333r   c                 0   t                      \  }}t          |d| dd          }t          |d| dd          }|                    |           |                    |          }|                    |           |                    |          }||k    sJ d S )Nr*   rM   r   r   r^   )r   r   r"   score)rj   r   r   r$   r%   score_1score_2s          r   test_lda_scorer     s     ())OL!%!  E &!  E 
kk!nnG	kk!nnGgr   c                     t                      \  } }t          | dddd          }|                    |           |                    |          }|                    |                                          }t          ||           d S )Nr*   r+   rM   r   r   )r   r   r:   rz   rY   r   )r   r   rB   r   r   s        r   test_perplexity_input_formatr   -  s     ())OL!
#!  C GGAJJJ^^AF^^AIIKK((F'''''r   c                  P   t                      \  } }t          | dd          }|                    |           |                    |d          }|                    |          }t          j        d|t          j        |j                  z  z            }t          ||           d S )Nr^   r   )r   r   r    Fr   rq   )
r   r   r:   rz   r   r   expre   datar   )r   r   rB   perplexity_1r   perplexity_2s         r   test_lda_score_perplexityr   >  s    '))OL!
#!BQ  C GGAJJJ>>!%>88LIIaLLE6$%"&.."89::Ll33333r   c                      t                      \  } }t          | dddd          }|                    |           |j        }|                    |          }t          ||           d S )Nr*   r+   r   )r   r   r-   r    r,   )r   r   r:   bound_rz   r   )r   r   rB   perplexity1perplexity2s        r   test_lda_fit_perplexityr   L  sy     ())OL!
#!  C GGAJJJ *K ..##K[11111r   c                  2   t          j        d          } | t          |           fD ]p}t          d                              |          }t          |j                            d          t          j        |j        j	        d                              qdS )z+Test LDA on empty document (all-zero rows).)r1   r0   i  )r   r   ra   r*   N)
r   zerosr   r   r:   r   r;   re   rf   rg   )Zr   rB   s      r   test_lda_empty_docsr   b  s    
AA 
 
'55599!<<OQ''1Fq1I)J)J	
 	
 	
 	

 
r   c                     t          j        ddd          } t          j        |           }t          | d|           t	          |t          j        t          |           t          t          j        |                     z
            d           |                     dd          } t	          t          |           t          |           t          t          j        | d	          d
d
t           j
        f                   z
  dd           d
S )z9Test Cython version of Dirichlet expectation calculation.ir^   i'  r   gҶOɃ;)atolrM   r*   ra   Ngdy=gA:)>)rtolr   )r   logspace
empty_liker   r
   r   r   re   reshaper	   newaxis)xexpectations     r   test_dirichlet_expectationr   l  s    
D"e$$A-""KaK000KARVAYY(?!@!@uMMMM			#sA!!$$ARVAA&&&qqq"*}5666	     r   c                    t                      \  }}t          |dd| |d          }t                      }t          j        |c}t          _        	 |                    |           |t          _        n# |t          _        w xY w|                                                    d          }	|                                                    d          }
||	k    sJ ||
k    sJ d S )Nr   r+   r   )r   r   r-   verboser,   r    
rz   )r   r   r   sysstdoutr:   getvaluecount)r   r,   expected_linesexpected_perplexitiesr   r   rB   outold_outn_linesn_perplexitys              r   check_verbosityr   |  s    '))OL!
#!%  C **C*cGSZ




W
llnn""4((G<<>>''55LW$$$$ L000000s   A1 1A?z;verbose,evaluate_every,expected_lines,expected_perplexities))Fr*   r   r   )Fr   r   r   )Tr   r   r   )Tr*   r   r   )Tr/   r   r*   c                 *    t          | |||           d S )N)r   )r   r,   r   r   s       r   test_verbosityr     s     G^^=RSSSSSr   c                      t                      \  } }t          |                               |          }|                                }t	          d t          |           D             |           dS )z6Check feature names out for LatentDirichletAllocation.)r   c                     g | ]}d | S )latentdirichletallocation ).0rT   s     r   
<listcomp>z.test_lda_feature_names_out.<locals>.<listcomp>  s!    FFFQ	(Q	(	(FFFr   N)r   r   r:   get_feature_names_outr   rQ   )r   r   rB   namess       r   test_lda_feature_names_outr     sw    '))OL!
#
>
>
>
B
B1
E
EC%%''EFF%2E2EFFF    r   r-   )r+   rI   c                 0   t           j                            d          }|                    d                              |d          }t          dd|           }|                    |           |j        j        |k    sJ |j	        j        |k    sJ dS )	z2Check data type preservation of fitted attributes.r   r\   r_   F)copyr1   r   r    r-   N)
r   r8   r9   uniformastyper   r:   r;   r   exp_dirichlet_component_)r-   global_dtyperA   r   rB   s        r   test_lda_dtype_matchr     s     )


"
"C"")),U)CCA
#Q  C GGAJJJ? L0000'-======r   c                    t           j                            |          }|                    d          }|                    t           j                  }t          d||                               |          }t          d||                               |          }t          |j	        |j	                   t          |
                    |          |
                    |                     dS )z>Check numerical consistency between np.float32 and np.float64.r\   r_   r1   r   N)r   r8   r9   r   r   float32r   r:   r
   r;   rm   )r-   global_random_seedrA   X64X32lda_64lda_32s          r   test_lda_numerical_consistencyr     s     )

 2
3
3C
++8+
$
$C
**RZ
 
 C&%7  	c#hh  '%7  	c#hh  F&(:;;;F$$S))6+;+;C+@+@AAAAAr   )5r   numpyr   scipy.linalgr   scipy.sparser   scipy.specialr   numpy.testingr   rt   sklearn.decompositionr   &sklearn.decomposition._online_lda_fastr   r	   sklearn.utils._testingr
   r   r   r   sklearn.exceptionsr   ior   r   r(   rF   rK   rV   rZ   ri   markparametrizero   rx   r{   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s   



     # # # # # # # # # # # #       , , , , , ,  ; ; ; ; ; ;       
 3 2 2 2 2 2 < < < < < < 6 6 6 6 6 6 D D D D D D - - - - - -        6 6 6": : :&: : :(: : :(: : : 	R 	R 	R #677
1 
1 87
1     ##677: : 87 #":& #: : #":(? ? ?. #6774 4 874< #677  874( ( ("4 4 42 2 2,
 
 
   1 1 1. A  	 	T T	 	T   *,?@@
> 
> A@
> *,?@@B B A@B B Br   