
    ܙdV[                        d dl Z d dlmZ d dlmZmZmZmZmZ d dl	m
Z
mZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ  e            Zej        Zej        Zej        Z ee            G d	 d
e          Ze G d de                      Ze G d de                      Ze G d de                      Z e G d de                      Z!e G d de                      Z"e G d de                      Z#e G d de                      Z$e G d de                      Z%e G d de
                      Z& G d de          Z'e G d d e
                      Z(e G d! d"e                      Z)e G d# d$e                      Z*e G d% d&e                      Z+e G d' d(e                      Z,e G d) d*e                      Z-e G d+ d,e                      Z.e G d- d.e                      Z/e G d/ d0e                      Z0e G d1 d2e                      Z1e G d3 d4e                      Z2e G d5 d6e                      Z3e G d7 d8e                      Z4e G d9 d:e                      Z5e G d; d<e                      Z6d= Z7d> Z8d? Z9 ee:           G d@ dAe                      Z;dB Z<dC Z=dD Z>dE Z? e9ej@        jA                  ZB e?e jC                  ZD e?e jE                  ZF e9ej@        jG                  ZH e?e jI                  ZJ e?e jK                  ZL e9ej@        jM                  ZN e?e jO                  ZP e?e jQ                  ZR e9ej@        jS                  ZT e9ej@        jU                  ZV e7ej@        jW                  ZX e8e jY                  ZZ e7ej@        j[                  Z\ e8e]          Z^ e<ej@        j_                  Z` e>e ja                    e<ej@        jb                  Zc e>e jd                    e<ej@        je                  Zf e>e jg                    e<ej@        jh                  Zi e>e jj                    e<ej@        jk                  Zl e>e jm                    e<ej@        jn                  Zo e>e jp                    e?e jq                    e?e jr                   dF ZsdG Zt esdH          Zu esdI          Zv esdJ          Zw esdK          Zx esdL          Zy esdM          Zz esdN          Z{ esdO          Z| esdP          Z} esdQ          Z~ esdR          Z esdS          Z esdT          Z esdU          Z esdV          Z etdW          ZdX Zej        ej        ej        ej        ej        ej        fZej        ej        ej        ej        fZej        ej        fZ eej        jC        e          Z eej        jI        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Z eej        j        e          Ze G dY dZe                      Ze G d[ d\e                      Ze G d] d^e                      Ze G d_ d`e
                      Ze G da dbe
                      Ze G dc dde
                      Ze G de dfe
                      Ze G dg dhe
                      Ze G di dje
                      Ze G dk dle
                      Z ee ej        e                     eD ]Z eee           dS )m    N)types)parse_dtypeparse_shaperegister_number_classesregister_numpy_ufunctrigonometric_functions)AttributeTemplateConcreteTemplateAbstractTemplateCallableTemplate	signatureRegistry)dim3
grid_group)
Conversion)cuda) declare_device_function_templatec                       e Zd Zd ZdS )Cuda_array_declc                     d }|S )Nc                 r   t          | t          j                  rt          | t          j                  sd S nDt          | t          j        t          j        f          rt          d | D                       rd S nd S t          |           }t          |          }||t          j	        ||d          S d S d S )Nc                 D    g | ]}t          |t          j                   S  )
isinstancer   IntegerLiteral).0ss     3lib/python3.11/site-packages/numba/cuda/cudadecl.py
<listcomp>z:Cuda_array_decl.generic.<locals>.typer.<locals>.<listcomp>!   s8     ( ( ( 'q%*>??? ( ( (    C)dtypendimlayout)
r   r   Integerr   TupleUniTupleanyr   r   Array)shaper"   r#   nb_dtypes       r   typerz&Cuda_array_decl.generic.<locals>.typer   s     %// !%)=>>  4 EEK#@AA  ( (!&( ( ( ) )  4  tu%%D"5))H#(8{SIIII $#(8(8r    r   selfr,   s     r   genericzCuda_array_decl.generic   s    	J 	J 	J& r    N__name__
__module____qualname__r/   r   r    r   r   r      s#            r    r   c                   &    e Zd Zej        j        ZdS )Cuda_shared_arrayN)r1   r2   r3   r   sharedarraykeyr   r    r   r5   r5   /   s        
+
CCCr    r5   c                   &    e Zd Zej        j        ZdS )Cuda_local_arrayN)r1   r2   r3   r   localr7   r8   r   r    r   r:   r:   4   s        
*
CCCr    r:   c                   ,    e Zd Zej        j        Zd ZdS )Cuda_const_array_likec                     d }|S )Nc                     | S Nr   )ndarrays    r   r,   z,Cuda_const_array_like.generic.<locals>.typer>   s    Nr    r   r-   s     r   r/   zCuda_const_array_like.generic=   s    	 	 	r    N)r1   r2   r3   r   const
array_liker8   r/   r   r    r   r=   r=   9   s-        
*
C    r    r=   c                   >    e Zd Zej        Z eej                  gZ	dS )Cuda_threadfence_deviceN)
r1   r2   r3   r   threadfencer8   r   r   nonecasesr   r    r   rE   rE   C   s*        

CYuz""#EEEr    rE   c                   >    e Zd Zej        Z eej                  gZ	dS )Cuda_threadfence_blockN)
r1   r2   r3   r   threadfence_blockr8   r   r   rG   rH   r   r    r   rJ   rJ   I   s*        

 CYuz""#EEEr    rJ   c                   >    e Zd Zej        Z eej                  gZ	dS )Cuda_threadfence_systemN)
r1   r2   r3   r   threadfence_systemr8   r   r   rG   rH   r   r    r   rM   rM   O   s*        

!CYuz""#EEEr    rM   c                   h    e Zd Zej        Z eej                   eej        ej	                  gZ
dS )Cuda_syncwarpN)r1   r2   r3   r   syncwarpr8   r   r   rG   i4rH   r   r    r   rP   rP   U   s;        
-CYuz""IIej%($C$CDEEEr    rP   c                   >    e Zd Zej        j        Z ee          gZ	dS )Cuda_cg_this_gridN)
r1   r2   r3   r   cg	this_gridr8   r   r   rH   r   r    r   rT   rT   [   s*        
'
CYz""#EEEr    rT   c                   >    e Zd Z ej        ej                  Zd ZdS )CudaCgModuleTemplatec                 4    t          j        t                    S r@   )r   FunctionrT   r.   mods     r   resolve_this_gridz&CudaCgModuleTemplate.resolve_this_gride       ~/000r    N)	r1   r2   r3   r   Moduler   rU   r8   r]   r   r    r   rX   rX   a   s6        
%,tw

C1 1 1 1 1r    rX   c                       e Zd ZdZd ZdS )Cuda_grid_group_synczGridGroup.syncc                 B    t          t          j        | j                  S )N)recvr)r   r   int32thisr.   argskwss      r   r/   zCuda_grid_group_sync.genericl   s    DI6666r    Nr1   r2   r3   r8   r/   r   r    r   ra   ra   i   s(        
C7 7 7 7 7r    ra   c                       e Zd ZeZd ZdS )GridGroup_attrsc                 @    t          j        t          t                    S r@   )r   BoundFunctionra   r   r[   s     r   resolve_synczGridGroup_attrs.resolve_synct   s    "#7DDDr    N)r1   r2   r3   r   r8   rn   r   r    r   rk   rk   p   s-        
CE E E E Er    rk   c                   0   e Zd Zej        Z e ej        ej	        ej
        f          ej	        ej	        ej	        ej	        ej	                   e ej        ej        ej
        f          ej	        ej	        ej        ej	        ej	                   e ej        ej        ej
        f          ej	        ej	        ej        ej	        ej	                   e ej        ej        ej
        f          ej	        ej	        ej        ej	        ej	                  gZdS )Cuda_shfl_sync_intrinsicN)r1   r2   r3   r   shfl_sync_intrinsicr8   r   r   r&   rR   b1i8f4f8rH   r   r    r   rp   rp   x   s       

"C	+%+ux233(EHeh%(	D 	D	+%+ux233(EHeh%(	D 	D	+%+ux233(EHeh%(	D 	D	+%+ux233(EHeh%(	D 	D	EEEr    rp   c                       e Zd Zej        Z e ej        ej	        ej
        f          ej	        ej	        ej
                  gZdS )Cuda_vote_sync_intrinsicN)r1   r2   r3   r   vote_sync_intrinsicr8   r   r   r&   rR   rr   rH   r   r    r   rw   rw      sP        

"CY{u{EHeh#788x585 5 6EEEr    rw   c                       e Zd Zej        Z eej        ej        ej                   eej        ej        ej	                   eej        ej        ej
                   eej        ej        ej                  gZdS )Cuda_match_any_syncN)r1   r2   r3   r   match_any_syncr8   r   r   rR   rs   rt   ru   rH   r   r    r   rz   rz      sy        

C	%(EHeh//	%(EHeh//	%(EHeh//	%(EHeh//	EEEr    rz   c            	          e Zd Zej        Z e ej        ej	        ej
        f          ej	        ej	                   e ej        ej	        ej
        f          ej	        ej                   e ej        ej	        ej
        f          ej	        ej                   e ej        ej	        ej
        f          ej	        ej                  gZdS )Cuda_match_all_syncN)r1   r2   r3   r   match_all_syncr8   r   r   r&   rR   rr   rs   rt   ru   rH   r   r    r   r}   r}      s        

C	+%+ux233UXuxHH	+%+ux233UXuxHH	+%+ux233UXuxHH	+%+ux233UXuxHH	EEEr    r}   c                   >    e Zd Zej        Z eej                  gZ	dS )Cuda_activemaskN)
r1   r2   r3   r   
activemaskr8   r   r   uint32rH   r   r    r   r   r      s)        
/CYu|$$%EEEr    r   c                   >    e Zd Zej        Z eej                  gZ	dS )Cuda_lanemask_ltN)
r1   r2   r3   r   lanemask_ltr8   r   r   r   rH   r   r    r   r   r      s*        

CYu|$$%EEEr    r   c                   t   e Zd ZdZej        Z eej	        ej	                   eej
        ej
                   eej        ej                   eej        ej                   eej        ej                   eej        ej                   eej        ej                   eej        ej                  gZdS )	Cuda_popcz
    Supported types from `llvm.popc`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   __doc__r   popcr8   r   r   int8int16rd   int64uint8uint16r   uint64rH   r   r    r   r   r      s          )C	%*ej))	%+u{++	%+u{++	%+u{++	%+u{++	%,--	%,--	%,--	EEEr    r   c                       e Zd ZdZej        Z eej	        ej	        ej	        ej	                   eej
        ej
        ej
        ej
                  gZdS )Cuda_fmaz
    Supported types from `llvm.fma`
    [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
    N)r1   r2   r3   r   r   fmar8   r   r   float32float64rH   r   r    r   r   r      s^          (C	%-u}MM	%-u}MMEEEr    r   c                   l    e Zd Zej        j        Z eej	        ej	        ej	        ej	                  gZ
dS )	Cuda_hfmaN)r1   r2   r3   r   fp16hfmar8   r   r   float16rH   r   r    r   r   r      s9        
).C	%-u}MMEEEr    r   c                   t    e Zd Zej        Z eej        ej                   eej	        ej	                  gZ
dS )	Cuda_cbrtN)r1   r2   r3   r   cbrtr8   r   r   r   r   rH   r   r    r   r   r      sD         )C	%-//	%-//EEEr    r   c                   t    e Zd Zej        Z eej        ej                   eej	        ej	                  gZ
dS )	Cuda_brevN)r1   r2   r3   r   brevr8   r   r   r   r   rH   r   r    r   r   r      sB        
)C	%,--	%,--EEEr    r   c                   t   e Zd ZdZej        Z eej	        ej	                   eej
        ej
                   eej        ej                   eej        ej                   eej        ej                   eej        ej                   eej        ej                   eej        ej                  gZdS )Cuda_clzz
    Supported types from `llvm.ctlz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   r   r   clzr8   r   r   r   r   rd   r   r   r   r   r   rH   r   r    r   r   r      s          (C	%*ej))	%+u{++	%+u{++	%+u{++	%+u{++	%,--	%,--	%,--	EEEr    r   c                   t   e Zd ZdZej        Z eej	        ej
                   eej	        ej                   eej	        ej                   eej	        ej                   eej	        ej                   eej	        ej                   eej	        ej	                   eej	        ej                  gZdS )Cuda_ffsz
    Supported types from `llvm.cttz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r1   r2   r3   r   r   ffsr8   r   r   r   r   r   rd   r   r   r   r   rH   r   r    r   r   r      s          (C	%,
++	%,,,	%,,,	%,,,	%,,,	%,--	%,--	%,--	EEEr    r   c                   "    e Zd Zej        Zd ZdS )	Cuda_selpc                    |rJ |\  }}}t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        f}||k    s||vrd S t          ||||          S r@   )
r   r   r   r   r   rd   r   r   r   r   )r.   rg   rh   testabsupported_typess          r   r/   zCuda_selp.generic  sq    
a !=%- ; ; ;6
 66Qo--FD!Q'''r    N)r1   r2   r3   r   selpr8   r/   r   r    r   r   r     s*        
)C( ( ( ( (r    r   c                 L     t            G  fddt                                }|S )Nc                   B    e Zd Z Z eej        ej                  gZdS )'_genfp16_unary.<locals>.Cuda_fp16_unaryNr1   r2   r3   r8   r   r   r   rH   l_keys   r   Cuda_fp16_unaryr   '  s,        5=%-889r    r   registerr
   r   r   s   ` r   _genfp16_unaryr   &  sJ    : : : : : : :* : : X: r    c                 \     t                      G  fddt                                }|S )Nc                       e Zd Z Zd ZdS )0_genfp16_unary_operator.<locals>.Cuda_fp16_unaryc                     |rJ t          |          dk    r:|d         t          j        k    r&t          t          j        t          j                  S d S d S )N   r   )lenr   r   r   rf   s      r   r/   z8_genfp16_unary_operator.<locals>.Cuda_fp16_unary.generic4  sK    NNN4yyA~~$q'U]":": >>> ~":":r    Nri   r   s   r   r   r   0  s)        	? 	? 	? 	? 	?r    r   register_globalr   r   s   ` r   _genfp16_unary_operatorr   /  sS    U? ? ? ? ? ? ?* ? ? ? r    c                 L     t            G  fddt                                }|S )Nc                   N    e Zd Z Z eej        ej        ej                  gZdS ))_genfp16_binary.<locals>.Cuda_fp16_binaryNr   r   s   r   Cuda_fp16_binaryr   =  s0        5=%-GGHr    r   r   )r   r   s   ` r   _genfp16_binaryr   <  sT    I I I I I I I+ I I XI r    c                       e Zd Zd ZdS )Floatc                 V    |rJ |\  }|t           j        k    rt          ||          S d S r@   )r   r   r   )r.   rg   rh   args       r   r/   zFloat.genericH  s7    %-S#&&&  r    Nr0   r   r    r   r   r   E  s#        ' ' ' ' 'r    r   c                 L     t            G  fddt                                }|S )Nc                   N    e Zd Z Z eej        ej        ej                  gZdS )1_genfp16_binary_comparison.<locals>.Cuda_fp16_cmpN)	r1   r2   r3   r8   r   r   rr   r   rH   r   s   r   Cuda_fp16_cmpr   R  s4         Iehu}==
r    r   r   )r   r   s   ` r   _genfp16_binary_comparisonr   Q  sJ    
 
 
 
 
 
 
( 
 
 X
 r    c                 `     t                      G  fddt                                }|S )Nc                       e Zd Z ZfdZdS )1_fp16_binary_operator.<locals>.Cuda_fp16_operatorc                 
   |rJ t          |          dk    r|d         t          j        k    s|d         t          j        k    r|d         t          j        k    r(| j                            |d         |d                   }n'| j                            |d         |d                   }|t
          j        k    s |t
          j        k    s|t
          j        k    r)t          t          j        t          j                  S d S d S d S )N   r   r   )
r   r   r   contextcan_convertr   exactpromotesafer   )r.   rg   rh   convertiblerettys       r   r/   z9_fp16_binary_operator.<locals>.Cuda_fp16_operator.genericn  s    NNN4yyA~~!W--aEM1I1IGu},,"&,":":47DG"L"LKK"&,":":47DG"L"LK  :#333:#555:?22$UEM5=III% ~1I1I  32r    Nri   )r   r   s   r   Cuda_fp16_operatorr   j  s:        	J 	J 	J 	J 	J 	J 	Jr    r   r   )r   r   r   s   `` r   _fp16_binary_operatorr   i  sd    UJ J J J J J J J- J J J4 r    c                 6    t          | t          j                  S r@   )r   r   rr   ops    r   _genfp16_comparison_operatorr     s     UX...r    c                 6    t          | t          j                  S r@   )r   r   r   r   s    r   _genfp16_binary_operatorr     s     U]333r    c                 |    t          d|  t          j        t          j        f          }t          j        |          S N__numba_wrapper_r   r   r   rZ   fnamedecls     r   _resolve_wrapped_unaryr     s;    +,Fu,F,F,1M-2],<> >D >$r    c                     t          d|  t          j        t          j        t          j        f          }t          j        |          S r   r   r   s     r   _resolve_wrapped_binaryr     sA    +,Fu,F,F,1M-2]EM,KM MD >$r    hsinhcoshloghlog10hlog2hexphexp10hexp2hsqrthrsqrthfloorhceilhrcphrinthtrunchdivc                 P     t            G  fddt                                }|S )Nc                       e Zd Z ZfdZdS )_gen.<locals>.Cuda_atomicc                     |rJ |\  }}}|j         vrd S |j        dk    r&t          |j         |t          j        |j                   S |j        dk    rt          |j         |||j                   S d S Nr   )r"   r#   r   r   intp)r.   rg   rh   aryidxvalr   s         r   r/   z!_gen.<locals>.Cuda_atomic.generic  sx    NNN MCcy//x1}} CSYGGGA Cci@@@ r    Nri   )r   r   s   r   Cuda_atomicr     s:        
	A 
	A 
	A 
	A 
	A 
	A 
	Ar    r  )r   r   )r   r   r  s   `` r   _genr    s[    A A A A A A A A& A A XA r    c                   ,    e Zd Zej        j        Zd ZdS )Cuda_atomic_compare_and_swapc                 x    |rJ |\  }}}|j         }|t          v r|j        dk    rt          ||||          S d S d S r  )r"   integer_numba_typesr#   r   )r.   rg   rh   r  oldr  dtys          r   r/   z$Cuda_atomic_compare_and_swap.generic  sS    S#i%%%#(a--S#sC000 &%--r    N)r1   r2   r3   r   atomiccompare_and_swapr8   r/   r   r    r   r
  r
    s-        
+
&C1 1 1 1 1r    r
  c                   ,    e Zd Zej        j        Zd ZdS )Cuda_atomic_casc                     |rJ |\  }}}}|j         }|t          vrd S |j        dk    rt          ||t          j        ||          S |j        dk    rt          |||||          S d S r  )r"   r  r#   r   r   r  )r.   rg   rh   r  r  r  r  r  s           r   r/   zCuda_atomic_cas.generic  s|    !S#si)))F8q==S#uz3<<<X\\S#sC555 \r    N)r1   r2   r3   r   r  casr8   r/   r   r    r   r  r    s,        
+/C6 6 6 6 6r    r  c                   J    e Zd Zej        Z eej        ej	                  gZ
dS )Cuda_nanosleepN)r1   r2   r3   r   	nanosleepr8   r   r   voidr   rH   r   r    r   r  r    s-        
.CYuz5<001EEEr    r  c                   $    e Zd ZeZd Zd Zd ZdS )
Dim3_attrsc                     t           j        S r@   r   rd   r[   s     r   	resolve_xzDim3_attrs.resolve_x$  
    {r    c                     t           j        S r@   r  r[   s     r   	resolve_yzDim3_attrs.resolve_y'  r  r    c                     t           j        S r@   r  r[   s     r   	resolve_zzDim3_attrs.resolve_z*  r  r    N)r1   r2   r3   r   r8   r  r   r"  r   r    r   r  r     sF        
C        r    r  c                   >    e Zd Z ej        ej                  Zd ZdS )CudaSharedModuleTemplatec                 4    t          j        t                    S r@   )r   rZ   r5   r[   s     r   resolve_arrayz&CudaSharedModuleTemplate.resolve_array2  r^   r    N)	r1   r2   r3   r   r_   r   r6   r8   r&  r   r    r   r$  r$  .  s6        
%,t{
#
#C1 1 1 1 1r    r$  c                   >    e Zd Z ej        ej                  Zd ZdS )CudaConstModuleTemplatec                 4    t          j        t                    S r@   )r   rZ   r=   r[   s     r   resolve_array_likez*CudaConstModuleTemplate.resolve_array_like:  s    ~3444r    N)	r1   r2   r3   r   r_   r   rB   r8   r*  r   r    r   r(  r(  6  s6        
%,tz
"
"C5 5 5 5 5r    r(  c                   >    e Zd Z ej        ej                  Zd ZdS )CudaLocalModuleTemplatec                 4    t          j        t                    S r@   )r   rZ   r:   r[   s     r   r&  z%CudaLocalModuleTemplate.resolve_arrayB      ~.///r    N)	r1   r2   r3   r   r_   r   r;   r8   r&  r   r    r   r,  r,  >  s6        
%,tz
"
"C0 0 0 0 0r    r,  c                       e Zd Z ej        ej                  Zd Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd ZdS )CudaAtomicTemplatec                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_addr[   s     r   resolve_addzCudaAtomicTemplate.resolve_addJ      ~o...r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_subr[   s     r   resolve_subzCudaAtomicTemplate.resolve_subM  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_andr[   s     r   resolve_and_zCudaAtomicTemplate.resolve_and_P  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_orr[   s     r   resolve_or_zCudaAtomicTemplate.resolve_or_S      ~n---r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_xorr[   s     r   resolve_xorzCudaAtomicTemplate.resolve_xorV  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_incr[   s     r   resolve_inczCudaAtomicTemplate.resolve_incY  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_decr[   s     r   resolve_deczCudaAtomicTemplate.resolve_dec\  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_exchr[   s     r   resolve_exchzCudaAtomicTemplate.resolve_exch_  r.  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_maxr[   s     r   resolve_maxzCudaAtomicTemplate.resolve_maxb  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_minr[   s     r   resolve_minzCudaAtomicTemplate.resolve_mine  r4  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_nanminr[   s     r   resolve_nanminz!CudaAtomicTemplate.resolve_nanminh      ~0111r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_atomic_nanmaxr[   s     r   resolve_nanmaxz!CudaAtomicTemplate.resolve_nanmaxk  rT  r    c                 4    t          j        t                    S r@   )r   rZ   r
  r[   s     r   resolve_compare_and_swapz+CudaAtomicTemplate.resolve_compare_and_swapn  s    ~:;;;r    c                 4    t          j        t                    S r@   )r   rZ   r  r[   s     r   resolve_caszCudaAtomicTemplate.resolve_casq  r4  r    N)r1   r2   r3   r   r_   r   r  r8   r3  r7  r:  r=  rA  rD  rG  rJ  rM  rP  rS  rW  rY  r[  r   r    r   r0  r0  F  s        
%,t{
#
#C/ / // / // / /. . ./ / // / // / /0 0 0/ / // / /2 2 22 2 2< < </ / / / /r    r0  c                       e Zd Z ej        ej                  Zd Zd Z	d Z
d Zd Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$d Z%dS ) CudaFp16Templatec                 4    t          j        t                    S r@   )r   rZ   	Cuda_haddr[   s     r   resolve_haddzCudaFp16Template.resolve_haddy      ~i(((r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_hsubr[   s     r   resolve_hsubzCudaFp16Template.resolve_hsub|  ra  r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_hmulr[   s     r   resolve_hmulzCudaFp16Template.resolve_hmul  ra  r    c                     t           S r@   )hdiv_devicer[   s     r   resolve_hdivzCudaFp16Template.resolve_hdiv      r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_hnegr[   s     r   resolve_hnegzCudaFp16Template.resolve_hneg  ra  r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_habsr[   s     r   resolve_habszCudaFp16Template.resolve_habs  ra  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_hfmazCudaFp16Template.resolve_hfma  ra  r    c                     t           S r@   )hsin_devicer[   s     r   resolve_hsinzCudaFp16Template.resolve_hsin  rk  r    c                     t           S r@   )hcos_devicer[   s     r   resolve_hcoszCudaFp16Template.resolve_hcos  rk  r    c                     t           S r@   )hlog_devicer[   s     r   resolve_hlogzCudaFp16Template.resolve_hlog  rk  r    c                     t           S r@   )hlog10_devicer[   s     r   resolve_hlog10zCudaFp16Template.resolve_hlog10      r    c                     t           S r@   )hlog2_devicer[   s     r   resolve_hlog2zCudaFp16Template.resolve_hlog2      r    c                     t           S r@   )hexp_devicer[   s     r   resolve_hexpzCudaFp16Template.resolve_hexp  rk  r    c                     t           S r@   )hexp10_devicer[   s     r   resolve_hexp10zCudaFp16Template.resolve_hexp10  r  r    c                     t           S r@   )hexp2_devicer[   s     r   resolve_hexp2zCudaFp16Template.resolve_hexp2  r  r    c                     t           S r@   )hfloor_devicer[   s     r   resolve_hfloorzCudaFp16Template.resolve_hfloor  r  r    c                     t           S r@   )hceil_devicer[   s     r   resolve_hceilzCudaFp16Template.resolve_hceil  r  r    c                     t           S r@   )hsqrt_devicer[   s     r   resolve_hsqrtzCudaFp16Template.resolve_hsqrt  r  r    c                     t           S r@   )hrsqrt_devicer[   s     r   resolve_hrsqrtzCudaFp16Template.resolve_hrsqrt  r  r    c                     t           S r@   )hrcp_devicer[   s     r   resolve_hrcpzCudaFp16Template.resolve_hrcp  rk  r    c                     t           S r@   )hrint_devicer[   s     r   resolve_hrintzCudaFp16Template.resolve_hrint  r  r    c                     t           S r@   )htrunc_devicer[   s     r   resolve_htrunczCudaFp16Template.resolve_htrunc  r  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_heqr[   s     r   resolve_heqzCudaFp16Template.resolve_heq      ~h'''r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_hner[   s     r   resolve_hnezCudaFp16Template.resolve_hne  r  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_hger[   s     r   resolve_hgezCudaFp16Template.resolve_hge  r  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_hgtr[   s     r   resolve_hgtzCudaFp16Template.resolve_hgt  r  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_hler[   s     r   resolve_hlezCudaFp16Template.resolve_hle  r  r    c                 4    t          j        t                    S r@   )r   rZ   Cuda_hltr[   s     r   resolve_hltzCudaFp16Template.resolve_hlt  r  r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_hmaxr[   s     r   resolve_hmaxzCudaFp16Template.resolve_hmax  ra  r    c                 4    t          j        t                    S r@   )r   rZ   	Cuda_hminr[   s     r   resolve_hminzCudaFp16Template.resolve_hmin  ra  r    N)&r1   r2   r3   r   r_   r   r   r8   r`  rd  rg  rj  rn  rq  rs  rv  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r    r   r]  r]  u  s       
%,ty
!
!C) ) )) ) )) ) )  ) ) )) ) )) ) )                              ( ( (( ( (( ( (( ( (( ( (( ( () ) )) ) ) ) )r    r]  c                       e Zd Z ej        e          Zd Zd Zd Z	d Z
d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#dS )CudaModuleTemplatec                 >    t          j        t          j                  S r@   )r   r_   r   rU   r[   s     r   
resolve_cgzCudaModuleTemplate.resolve_cg  s    |DG$$$r    c                     t           S r@   r   r[   s     r   resolve_threadIdxz$CudaModuleTemplate.resolve_threadIdx      r    c                     t           S r@   r  r[   s     r   resolve_blockIdxz#CudaModuleTemplate.resolve_blockIdx  r  r    c                     t           S r@   r  r[   s     r   resolve_blockDimz#CudaModuleTemplate.resolve_blockDim  r  r    c                     t           S r@   r  r[   s     r   resolve_gridDimz"CudaModuleTemplate.resolve_gridDim  r  r    c                     t           j        S r@   r  r[   s     r   resolve_laneidz!CudaModuleTemplate.resolve_laneid  r  r    c                 >    t          j        t          j                  S r@   )r   r_   r   r6   r[   s     r   resolve_sharedz!CudaModuleTemplate.resolve_shared      |DK(((r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_popczCudaModuleTemplate.resolve_popc  ra  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_brevzCudaModuleTemplate.resolve_brev  ra  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_clzzCudaModuleTemplate.resolve_clz  r  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_ffszCudaModuleTemplate.resolve_ffs  r  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_fmazCudaModuleTemplate.resolve_fma  r  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_cbrtzCudaModuleTemplate.resolve_cbrt  ra  r    c                 4    t          j        t                    S r@   )r   rZ   rE   r[   s     r   resolve_threadfencez&CudaModuleTemplate.resolve_threadfence      ~5666r    c                 4    t          j        t                    S r@   )r   rZ   rJ   r[   s     r   resolve_threadfence_blockz,CudaModuleTemplate.resolve_threadfence_block  s    ~4555r    c                 4    t          j        t                    S r@   )r   rZ   rM   r[   s     r   resolve_threadfence_systemz-CudaModuleTemplate.resolve_threadfence_system  r  r    c                 4    t          j        t                    S r@   )r   rZ   rP   r[   s     r   resolve_syncwarpz#CudaModuleTemplate.resolve_syncwarp  s    ~m,,,r    c                 4    t          j        t                    S r@   )r   rZ   rp   r[   s     r   resolve_shfl_sync_intrinsicz.CudaModuleTemplate.resolve_shfl_sync_intrinsic      ~6777r    c                 4    t          j        t                    S r@   )r   rZ   rw   r[   s     r   resolve_vote_sync_intrinsicz.CudaModuleTemplate.resolve_vote_sync_intrinsic  r  r    c                 4    t          j        t                    S r@   )r   rZ   rz   r[   s     r   resolve_match_any_syncz)CudaModuleTemplate.resolve_match_any_sync      ~1222r    c                 4    t          j        t                    S r@   )r   rZ   r}   r[   s     r   resolve_match_all_syncz)CudaModuleTemplate.resolve_match_all_sync  r  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_activemaskz%CudaModuleTemplate.resolve_activemask  r4  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_lanemask_ltz&CudaModuleTemplate.resolve_lanemask_lt  r.  r    c                 4    t          j        t                    S r@   )r   rZ   r   r[   s     r   resolve_selpzCudaModuleTemplate.resolve_selp  ra  r    c                 4    t          j        t                    S r@   )r   rZ   r  r[   s     r   resolve_nanosleepz$CudaModuleTemplate.resolve_nanosleep   r>  r    c                 >    t          j        t          j                  S r@   )r   r_   r   r  r[   s     r   resolve_atomicz!CudaModuleTemplate.resolve_atomic#  r  r    c                 >    t          j        t          j                  S r@   )r   r_   r   r   r[   s     r   resolve_fp16zCudaModuleTemplate.resolve_fp16&  s    |DI&&&r    c                 >    t          j        t          j                  S r@   )r   r_   r   rB   r[   s     r   resolve_constz CudaModuleTemplate.resolve_const)      |DJ'''r    c                 >    t          j        t          j                  S r@   )r   r_   r   r;   r[   s     r   resolve_localz CudaModuleTemplate.resolve_local,  r  r    N)$r1   r2   r3   r   r_   r   r8   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r    r   r  r    s       
%,t

C% % %          ) ) )) ) )) ) )( ( (( ( (( ( () ) )7 7 76 6 67 7 7- - -8 8 88 8 83 3 33 3 3/ / /0 0 0) ) ). . .) ) )' ' '( ( (( ( ( ( (r    r  )operator
numba.corer   numba.core.typing.npydeclr   r   r   r   r   numba.core.typing.templatesr	   r
   r   r   r   r   numba.cuda.typesr   r   numba.core.typeconvr   numbar   numba.cuda.compilerr   registryr   register_attrr   r   r5   r:   r=   rE   rJ   rM   rP   rT   rX   ra   rk   rp   rw   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   floatr   r   r   r   r   r   haddr_  addCuda_addiadd	Cuda_iaddhsubrc  subCuda_subisub	Cuda_isubhmulrf  mulCuda_mulimul	Cuda_imulhmaxr  hminr  hnegrm  negCuda_neghabsrp  absCuda_absheqr  eqhner  nehger  gehgtr  gthler  lehltr  lttruedivitruedivr   r   ru  rx  r{  r~  r  r  r  r  r  r  r  r  r  r  r  ri  r  r   r   rd   r   r   r   all_numba_typesr  unsigned_int_numba_typesr  r2  r6  maxrL  minrO  nanmaxrV  nanminrR  and_r9  or_r<  xorr@  incrC  decrF  exchrI  r
  r  r  r  r$  r(  r,  r0  r]  r  r_   funcr   r    r   <module>r<     s         @ @ @ @ @ @ @ @ @ @ @ @ @ @> > > > > > > > > > > > > > > > . - - - - - - - * * * * * *       @ @ @ @ @ @8::&*   ( ( (    &   0 
       
 
       
 
    ,   
 
$ $ $ $ $. $ $ 
$
 
$ $ $ $ $- $ $ 
$
 
$ $ $ $ $. $ $ 
$
 
E E E E E$ E E 
E
 
$ $ $ $ $( $ $ 
$
 1 1 1 1 1, 1 1 17 7 7 7 7+ 7 7 7 E E E E E' E E E 
    /   
 
6 6 6 6 6/ 6 6 
6 
    *   
 
    *   
 
& & & & && & & 
&
 
& & & & &' & & 
&
 
        
$ 
	 	 	 	 	 	 	 
	 
        
 
        
 
        
 
       
$ 
       
$ 
( ( ( ( (  ( ( 
((  
 
 
   ' ' ' ' ' ' ' '  0  >/ / /4 4 4 ODIN++	##HL11$$X]33	ODIN++	##HL11$$X]33	ODIN++	##HL11$$X]33	ODIN++	ODIN++	N49>**	""8<00N49>**	""3''%%dim44  X[ ) ) )%%dim44  X[ ) ) )%%dim44  X[ ) ) )%%dim44  X[ ) ) )%%dim44  X[ ) ) )%%dim44  X[ ) ) )  ) * * *  * + + +           %$V,,$$V,,$$V,,&&x00%%g..$$V,,&&x00%%g..%%g..&&x00&&x00%%g..$$V,,%%g..&&x00%%f--  & =%-;;. {EL{EL2  "L%,7 $t{88$t{88$t{88$t{88T$+,o>> T$+,o>> $t{')<==dko':;;$t{(;<<$t{(@AA$t{(@AA4(*=>>  
	1 	1 	1 	1 	1#3 	1 	1 
	1 
6 6 6 6 6& 6 6 
6" 
2 2 2 2 2% 2 2 
2 
 
 
 
 
" 
 
 
 1 1 1 1 10 1 1 1 5 5 5 5 5/ 5 5 5 0 0 0 0 0/ 0 0 0 +/ +/ +/ +/ +/* +/ +/ +/\ [) [) [) [) [)( [) [) [)| X( X( X( X( X(* X( X( X(v lel4(( ) ) )
 $ 0 0D////0 0r    