
    ܙd                         d dl Z d dlmZ d dlmZ ddlmZ ddlmZ d Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zdddd d!d"d#d$d%d&d'd(d)d*d+Zd, Z  G d- d.e!          Z"d/ Z#dS )0    N)ir)cgutils   )nvvm)current_contextc                 <   dt          |          z   dz   }t          j        t          j        |          t          j        t          j        |                    t          j        |          t          j        |          f          }t          j        | ||          S )N___numba_atomic_i	_cas_hack)strr   FunctionTypeIntTypePointerTyper   get_or_insert_function)lmodisizefnamefntys       4lib/python3.11/site-packages/numba/cuda/nvvmutils.pydeclare_atomic_cas_intr      s    #e**,{:E?2:e,,N2:e+<+<==Ju--Ju--/0 0D )$e<<<    c                     t          j                    j        r/|                     |||dd          }|                     |d          S |                     t          ||          |||f          S )N	monotonicr   )r   NVVM	is_nvvm70cmpxchgextract_valuecallr   )builderr   r   ptrcmpvalouts          r   atomic_cmpxchgr#      sn    y{{ -ooc3[+FF$$S!,,,||24?? #sO- - 	-r   c                     d}t          j        t          j                    t          j        t          j                    d          t          j                    f          }t	          j        | ||          S )Nz#llvm.nvvm.atomic.load.add.f32.p0f32r   r   r   	FloatTyper   r   r   r   r   r   s      r   declare_atomic_add_float32r(      sW    1E?2<>>N2<>>1==r|~~NP PD)$e<<<r   c                 (   t                      j        j        dk    rd}nd}t          j        t          j                    t          j        t          j                              t          j                    f          }t          j        | ||          S )N)   r   z#llvm.nvvm.atomic.load.add.f64.p0f64___numba_atomic_double_add)	r   devicecompute_capabilityr   r   
DoubleTyper   r   r   r'   s      r   declare_atomic_add_float64r/   !   ss    2f<<5,?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_subr%   r'   s      r   declare_atomic_sub_float32r2   +   U    'E?2<>>N2<>>::BLNNKM MD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_subr   r   r.   r   r   r   r'   s      r   declare_atomic_sub_float64r7   2   U    (E?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32    r   r   r   r   r   r   r'   s      r   declare_atomic_inc_int32r<   9   [    0E?2:b>>N2:b>>::BJrNNKM MD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )N___numba_atomic_u64_inc@   r;   r'   s      r   declare_atomic_inc_int64rA   @   [    %E?2:b>>N2:b>>::BJrNNKM MD)$e<<<r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r:   r;   r'   s      r   declare_atomic_dec_int32rD   G   r=   r   c                     d}t          j        t          j        d          t          j        t          j        d                    t          j        d          f          }t	          j        | ||          S )N___numba_atomic_u64_decr@   r;   r'   s      r   declare_atomic_dec_int64rG   N   rB   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_maxr%   r'   s      r   declare_atomic_max_float32rJ   U   r3   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_maxr6   r'   s      r   declare_atomic_max_float64rM   \   r8   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_minr%   r'   s      r   declare_atomic_min_float32rP   c   r3   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_minr6   r'   s      r   declare_atomic_min_float64rS   j   r8   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_nanmaxr%   r'   s      r   declare_atomic_nanmax_float32rV   q   U    *E?2<>>N2<>>::BLNNKM MD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_nanmaxr6   r'   s      r   declare_atomic_nanmax_float64rZ   x   U    +E?2=??N2=??;;R]__MO OD)$e<<<r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_float_nanminr%   r'   s      r   declare_atomic_nanmin_float32r^      rW   r   c                     d}t          j        t          j                    t          j        t          j                              t          j                    f          }t	          j        | ||          S )N___numba_atomic_double_nanminr6   r'   s      r   declare_atomic_nanmin_float64ra      r[   r   c                     d}t          j        t          j        d          t          j        d          f          }t          j        | ||          S )NcudaCGGetIntrinsicHandler@   r:   r   r   r   r   r   r'   s      r    declare_cudaCGGetIntrinsicHandlere      sE    &E?2:b>>JrNN,. .D)$e<<<r   c                     d}t          j        t          j        d          t          j        d          t          j        d          f          }t          j        | ||          S )NcudaCGSynchronizer:   r@   rd   r'   s      r   declare_cudaCGSynchronizerh      sO    E?2:b>>JrNNBJrNN;= =D)$e<<<r   c                 f   | j         j        j        }t          j        |                    d          dz             }t          j        ||j        dt          j	                  }d|_
        d|_        ||_        |                     |t          j        t          j        d                    d          S )	Nzutf-8    _str)name	addrspaceinternalT   generic)basic_blockfunctionmoduler   make_bytearrayencodeadd_global_variabletyper   ADDRSPACE_CONSTANTlinkageglobal_constantinitializeraddrspacecastr   r   r   )r   valuer   cvalgls        r   declare_stringr      s    '.D!%,,w"7"7'"ABBD		$T496/3/F
H 
H 
HBBJBBN  R^BJqMM%B%BINNNr   c                     t          j        t          j        d                    }t          j        t          j        d          ||g          }t	          j        | |d          }|S )Nro   r:   vprintf)r   r   r   r   r   r   )r   	voidptrty	vprintftyr   s       r   declare_vprintr      sR    rz!}}--I 
2I0FGGI,T9iHHGNr   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zwarpsizelaneidc                     | j         }t          j        t          j        d          d          }t	          j        ||t          |                   }|                     |d          S )Nr:    )rs   r   r   r   r   r   SREG_MAPPINGr   )r   rl   rs   r   fns        r   	call_sregr      sO    ^F?2:b>>2..D		'l46H	I	IB<<Br   c                   2    e Zd Zd Zd Zd Zd Zd Zd ZdS )SRegBuilderc                     || _         d S N)r   )selfr   s     r   __init__zSRegBuilder.__init__   s    r   c                 2    t          | j        d|z            S )Nztid.%sr   r   r   xyzs     r   tidzSRegBuilder.tid   s    x#~666r   c                 2    t          | j        d|z            S )Nzctaid.%sr   r   s     r   ctaidzSRegBuilder.ctaid   s    zC'7888r   c                 2    t          | j        d|z            S )Nzntid.%sr   r   s     r   ntidzSRegBuilder.ntid   s    y3777r   c                 2    t          | j        d|z            S )Nz	nctaid.%sr   r   s     r   nctaidzSRegBuilder.nctaid   s    {S'8999r   c                     |                      |          }|                     |          }|                     |          }| j                            | j                            ||          |          }|S r   )r   r   r   r   addmul)r   r   r   r   r   ress         r   getdimzSRegBuilder.getdim   s]    hhsmmyy~~Clt|//f==sCC
r   N)	__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   r      sn          7 7 79 9 98 8 8: : :    r   r   c                     t          |           fddD             }t          t          j        |d |                    }|dk    r|d         S |S )Nc              3   B   K   | ]}                     |          V  d S r   )r   ).0r   sregs     r   	<genexpr>z get_global_id.<locals>.<genexpr>   s/      	,	,s$++c

	,	,	,	,	,	,r   r   r   r   )r   list	itertoolsislice)r   dimitseqr   s       @r   get_global_idr      s\    wD	,	,	,	,e	,	,	,B
yD#..
/
/C
axx1v
r   )$r   llvmliter   
numba.corer   cudadrvr   apir   r   r#   r(   r/   r2   r7   r<   rA   rD   rG   rJ   rM   rP   rS   rV   rZ   r^   ra   re   rh   r   r   r   r   objectr   r   r   r   r   <module>r      s0                                    = = =- - -= = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = == = =	O 	O 	O   -,,...0002222.% ,         &   0    r   