
    ܙdz                     z   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZmZmZ ddlmZmZmZ ddlmZmZ  ej        e          ZdZdZd	Z d
Z!dZ"eZ#eZ$d%                                Z& e'e&          D ]\  Z(Z) e*ej+        e         e)e(           dZ,dZ-d Z. ej/                    Z0 G d de1          Z2 G d de1          Z3dZ4ddddddddddddZ5d Z6d Z7d Z8d Z9dZ: G d  d!e1          Z;d"Z<d#Z=d$Z>d%Z?d&Z@d'ZAd(ZBd) ZCd* ZDd+ ZEd, ZFd- ZGd. ZHd/ ZId0 ZJ ejK        d1          ZL ejK        d2          ZM ejK        d3          ZNd4ZO ejK        eOP                    d5d6                    ZQ ejK        d7          ZRh d8ZS ejK        d9          ZT ejK        d:          ZU ejK        d;          ZV ejK        d<          ZW ejK        d=          ZX ejK        d>          ZY ejK        d?          ZZ ejK        d@          Z[ ejK        dA          Z\dBdCdDZ]dE Z^dF Z_dG Z`dH ZadI ZbdJ ZcdS )Kz(
This is a direct translation of nvvm.h
    N)c_void_pc_intPOINTERc_char_pc_size_tbyrefc_char)ir   )	NvvmErrorNvvmSupportErrorNvvmWarning)get_libdeviceopen_libdeviceopen_cudalib)cgutilsconfig         a  
NVVM_SUCCESS
NVVM_ERROR_OUT_OF_MEMORY
NVVM_ERROR_PROGRAM_CREATION_FAILURE
NVVM_ERROR_IR_VERSION_MISMATCH
NVVM_ERROR_INVALID_INPUT
NVVM_ERROR_INVALID_PROGRAM
NVVM_ERROR_INVALID_IR
NVVM_ERROR_INVALID_OPTION
NVVM_ERROR_NO_MODULE_IN_PROGRAM
NVVM_ERROR_COMPILATION
ze-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64ze-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64c                  F    	 t                       dS # t          $ r Y dS w xY w)z(
    Return if libNVVM is available
    TF)NVVMr        7lib/python3.11/site-packages/numba/cuda/cudadrv/nvvm.pyis_availabler   <   s;     t    uus    
  c                      e Zd ZdZe ee           ee          fe ee          fe ee          feeee	efeeee	efeee ee          fee ee	          feeefee ee	          feeefe ee           ee           ee           ee          feee ee          fdZ
dZd Zd Zed             Zed             Zed             Zd	 Zd
 ZddZdS )r   zProcess-wide singleton.
    )nvvmVersionnvvmCreateProgramnvvmDestroyProgramnvvmAddModuleToProgramnvvmLazyAddModuleToProgramnvvmCompileProgramnvvmGetCompiledResultSizenvvmGetCompiledResultnvvmGetProgramLogSizenvvmGetProgramLognvvmIRVersionnvvmVerifyProgramNc                    t           5  | j        t                              |           x| _        }	 t	          d          |_        n-# t          $ r }d | _        d}t          ||z            d }~ww xY w|j        	                                D ]G\  }}t          |j        |          }|d         |_        |dd          |_        t          |||           Hd d d            n# 1 swxY w Y   | j        S )Nnvvmz;libNVVM cannot be found. Do `conda install cudatoolkit`:
%sr   r   )
_nvvm_lock_NVVM__INSTANCEobject__new__r   driverOSErrorr   _PROTOTYPESitemsgetattrrestypeargtypessetattr)clsinsteerrmsgnameprotofuncs          r   r/   zNVVM.__new__   s@    	. 	.~%(.s(;(;;7".v"6"6DKK 7 7 7%)CN2F*6A:666	7 $(#3#9#9#;#; . .KD%"4;55D#(8DL$)!""IDMD$----!	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	. 	.$ ~s5   )C!AC!
A1A,,A11A$C!!C%(C%c                     |                                  }|d         | _        |d         | _        |d         | _        |d         | _        t                      | _        d S )Nr   r      r   )get_ir_version_majorIR_minorIR	_majorDbg	_minorDbgget_supported_ccs_supported_ccs)selfir_versionss     r   __init__zNVVM.__init__   sS    ))++#A#A$Q$Q/11r   c                 &    | j         | j        fdk    S )N)r      )rB   rC   rH   s    r   	is_nvvm70zNVVM.is_nvvm70   s    
 t}-77r   c                 B    | j         | j        fdk     rt          S t          S )N)r      )rB   rC   _datalayout_original_datalayout_i128rM   s    r   data_layoutzNVVM.data_layout   s"    M4=)F22''##r   c                     | j         S N)rG   rM   s    r   supported_ccszNVVM.supported_ccs   s    ""r   c                     t                      }t                      }|                     t          |          t          |                    }|                     |d           |j        |j        fS )NzFailed to get version.)r   r   r   check_errorvalue)rH   majorminorerrs       r   get_versionzNVVM.get_version   s[    uU||U5\\::6777{EK''r   c                 j   t                      }t                      }t                      }t                      }|                     t          |          t          |          t          |          t          |                    }|                     |d           |j        |j        |j        |j        fS )NzFailed to get IR version.)r   r(   r   rX   rY   )rH   majorIRminorIRmajorDbgminorDbgr\   s         r   rA   zNVVM.get_ir_version   s    ''''7777  ww!&x%//C C9:::}gmX^X^KKr   Fc                     |rDt          |t          |                   }|r%t          |           t          j        d           d S |d S )Nr   )r   RESULT_CODE_NAMESprintsysexit)rH   errormsgrg   excs        r   rX   zNVVM.check_error   sR     	C!25!9::C c


		 	r   )F)__name__
__module____qualname____doc__nvvm_resultr   r   nvvm_programr   r   r2   r-   r/   rJ   propertyrN   rS   rV   r]   rA   rX   r   r   r   r   r   K   s        
 $WWU^^WWU^^D *77<+@+@A  +GGL,A,AB
 x8#E x8'E uggh.?.?A wwx'8'8&: #.|X!F #.|WWX=N=N!O *<B &wwu~~wwu~~!'%..''%..: *<%gh//1c3 3Kl J  *2 2 2 8 8 X8 $ $ X$ # # X#( ( (L L L     r   r   c                   8    e Zd Zd Zd Zd Zd Zd Zd Zd Z	dS )	CompilationUnitc                     t                      | _        t                      | _        | j                            t          | j                            }| j                            |d           d S )NzFailed to create CU)r   r0   rp   _handler   r   rX   )rH   r\   s     r   rJ   zCompilationUnit.__init__   sV    ff#~~k++E$,,?,?@@%:;;;;;r   c                     t                      }|                    t          | j                            }|                    |dd           d S )NzFailed to destroy CUT)rg   )r   r    r   ru   rX   )rH   r0   r\   s      r   __del__zCompilationUnit.__del__   sJ    ''dl(;(;<<3 6TBBBBBr   c                     | j                             | j        |t          |          d          }| j                             |d           dS )z
         Add a module level NVVM IR to a compilation unit.
         - The buffer should contain an NVVM module IR either in the bitcode
           representation (LLVM3.0) or in the text representation.
        NFailed to add module)r0   r!   ru   lenrX   rH   bufferr\   s      r   
add_modulezCompilationUnit.add_module   sM     k00v14VdD D%;<<<<<r   c                     | j                             | j        |t          |          d          }| j                             |d           dS )z
        Lazily add an NVVM IR module to a compilation unit.
        The buffer should contain NVVM module IR either in the bitcode
        representation or in the text representation.
        Nry   )r0   r"   ru   rz   rX   r{   s      r   lazy_add_modulezCompilationUnit.lazy_add_module   sM     k44T\658[[$H H%;<<<<<r   c                    g }d|v r+|                     d|                    d          z             |                    d          r+|                     d|                    d          z             d}|D ]c}||v r]t          t	          |                    |                              }|                     d|                    dd          |fz             d|r\d	                    t          t          |	                                                    }t          d
                    |                    t          t          |          z  d |D              }| j                            | j        t          |          |          }|                     |d           | j                            | j        t          |          |          }|                     |d           t'                      }	| j                            | j        t+          |	                    }|                     |d           t-          |	j        z              }
| j                            | j        |
          }|                     |d           |                                 | _        | j        r t7          j        | j        t:                     |
dd         S )aj  Perform Compilation

        The valid compiler options are

         *   - -opt=
         *     - 0 (disable optimizations)
         *     - 3 (default, enable optimizations)
         *   - -arch=
         *     - compute_XX where XX is in (35, 37, 50, 52, 53, 60, 61, 62, 70,
         *                                  72, 75, 80, 86, 89, 90).
         *       The default is compute_52.
         *   - -ftz=
         *     - 0 (default, preserve denormal values, when performing
         *          single-precision floating-point operations)
         *     - 1 (flush denormal values to zero, when performing
         *          single-precision floating-point operations)
         *   - -prec-sqrt=
         *     - 0 (use a faster approximation for single-precision
         *          floating-point square root)
         *     - 1 (default, use IEEE round-to-nearest mode for
         *          single-precision floating-point square root)
         *   - -prec-div=
         *     - 0 (use a faster approximation for single-precision
         *          floating-point division and reciprocals)
         *     - 1 (default, use IEEE round-to-nearest mode for
         *          single-precision floating-point division and reciprocals)
         *   - -fma=
         *     - 0 (disable FMA contraction)
         *     - 1 (default, enable FMA contraction)
         *
         optz-opt=%darchz-arch=%s)ftz	prec_sqrtprec_divfmaz-%s=%d_-, zunsupported option {0}c                 R    g | ]$}t          |                    d                     %S )utf8)r   encode).0xs     r   
<listcomp>z+CompilationUnit.compile.<locals>.<listcomp>+  s<     *9 *9 *9./ +3188F3C3C*D*D *9 *9 *9r   zFailed to verify
zFailed to compile
z&Failed to get size of compiled result.zFailed to get compiled result.)categoryN)appendpopgetintboolreplacejoinmapreprkeysr   formatr   rz   r0   r)   ru   
_try_errorr#   r   r$   r   r	   rY   r%   get_loglogwarningswarnr   )rH   optionsoptsother_optionskvoptstrc_optsr\   reslenptxbufs              r   compilezCompilationUnit.compile   s   D GKK	GKK$6$66777;;v 	:KK
W[[%8%88999
  	A 	AAG||W[[^^,,--H		#s(;(;Q'??@@@  	EYYs48899F4;;FCCDDDSYY& *9 *937*9 *9 *9 : k++DL#d))VLL1222 k,,T\3t99fMM2333 k33DL%--PPEFFF6<'**k//fEE=>>> <<>>8 	:M$([9999aaayr   c                 j    | j                             ||d|                                            d S )N
)r0   rX   r   )rH   r\   ri   s      r   r   zCompilationUnit._try_errorF  s3    dllnnn%EFFFFFr   c                    t                      }| j                            | j        t	          |                    }| j                            |d           |j        dk    rkt          |j        z              }| j                            | j        |          }| j                            |d           |j        	                    d          S dS )Nz#Failed to get compilation log size.r   zFailed to get compilation log.r    )
r   r0   r&   ru   r   rX   rY   r	   r'   decode)rH   r   r\   logbufs       r   r   zCompilationUnit.get_logI  s    k//eFmmLL%JKKK<!v|+..F+//fEECK##C)IJJJ<&&v...rr   N)
rk   rl   rm   rJ   rw   r}   r   r   r   r   r   r   r   rs   rs      s        < < <C C C
= = == = =T T TlG G G    r   rs   )r   r   )r      r   r   )r   r@   )r   r   )rL   r   )rL   r   )rL   r@   )r   r   )r   r@   )r   r   rP   r   rP   rL   rP   r   )rP   	   r   r   )r   r   )r   r   )r   r   )r   r   )r   r   ))   r   )r   r   )r   r@   )r   r   )r   r   )r   r   )r   rL   )r   r   )r   rP   )   r   )r   r   c                     	 t           |          \  t          fdt          D                       S # t          $ r! t          d t          D                       cY S w xY w)Nc                 4    g | ]}|cxk    rk    n n|S r   r   )r   ccmax_ccmin_ccs     r   r   z(ccs_supported_by_ctk.<locals>.<listcomp>u  sD     1 1 1R2///////// ///r   c                 2    g | ]}|t           j        k    |S r   )r   CUDA_DEFAULT_PTX_CC)r   r   s     r   r   z(ccs_supported_by_ctk.<locals>.<listcomp>z  s/     ; ; ;Rv999 999r   )CTK_SUPPORTEDtupleCOMPUTE_CAPABILITIESKeyError)ctk_versionr   r   s    @@r   ccs_supported_by_ctkr   q  s    	<&{3 1 1 1 1 1#7 1 1 1 2 2 	2 < < <  ; ;#7 ; ; ; < < 	< 	< 	<<s   05 (A A c                  ,   	 ddl m}  |                                 }n#  d}|cY S xY wt          t                    }||k     rCd}|d          d|d          }d| d|d          d|d          d}t          j        |           |S t          |          }|S )	Nr   )runtimer   .r   zCUDA Toolkit z is unsupported by Numba - z! is the minimum required version.)numba.cuda.cudadrv.runtimer   r]   minr   r   r   r   )r   cudart_version_supported_cc
min_cudartctk_verunsupported_vers         r   rF   rF   ~  s    666666 ,,..  ]##J
""#A&<<):<</7 / /(m/ /.8m/ / / 	o&&&(88Ms    %c                     t                      j        }|sd}t          |          t          |          D ]?\  }}|| k    r|c S || k    r*|dk    rd| |z   z  }t          |          ||dz
           c S @|d         S )z
    Given a compute capability, return the closest compute capability supported
    by the CUDA toolkit.

    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
    zmNo supported GPU compute capabilities found. Please check your cudatoolkit version matches your CUDA version.r   z?GPU compute capability %d.%d is not supported(requires >=%d.%d)r   )r   rV   r   	enumerate)myccrV   ri   ir   s        r   find_closest_archr     s     FF(M $Qs###=)) , ,2::III$YYAvv+.2Ri9&s+++ %QU++++  r   c                 `    t           j        rt           j        }nt          | |f          }d|z  S )z1Matches with the closest architecture option
    zcompute_%d%d)r   FORCE_CUDA_CCr   )rZ   r[   r   s      r   get_arch_optionr     s5      1# %00D  r   z~Missing libdevice file.
Please ensure you have package cudatoolkit >= 11.0
Install package by:

    conda install cudatoolkit
c                       e Zd ZdZd Zd ZdS )	LibDeviceNc                     | j         5t                      t          t                    t	                      | _         | j         | _        d S rU   )_cache_r   RuntimeErrorMISSING_LIBDEVICE_FILE_MSGr   bcrM   s    r   rJ   zLibDevice.__init__  s=    <&"#=>>>)++DL,r   c                     | j         S rU   )r   rM   s    r   r   zLibDevice.get  s	    wr   )rk   rl   rm   r   rJ   r   r   r   r   r   r     s7        G      r   r   z
define internal {T} @___numba_atomic_{T}_cas_hack({T}* %ptr, {T} %cmp, {T} %val) alwaysinline {{
    %out = cmpxchg volatile {T}* %ptr, {T} %cmp, {T} %val monotonic
    ret {T} %out
}}
z
    %cas_success = cmpxchg volatile {Ti}* %iptr, {Ti} %old, {Ti} %new monotonic monotonic
    %cas = extractvalue {{ {Ti}, i1 }} %cas_success, 0
zI
    %cas = cmpxchg volatile {Ti}* %iptr, {Ti} %old, {Ti} %new monotonic
a  
define internal {T} @___numba_atomic_{T}_{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
entry:
    %iptr = bitcast {T}* %ptr to {Ti}*
    %old2 = load volatile {Ti}, {Ti}* %iptr
    br label %attempt

attempt:
    %old = phi {Ti} [ %old2, %entry ], [ %cas, %attempt ]
    %dold = bitcast {Ti} %old to {T}
    %dnew = {OP} {T} %dold, %val
    %new = bitcast {T} %dnew to {Ti}
    {CAS}
    %repeat = icmp ne {Ti} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    %result = bitcast {Ti} %old to {T}
    ret {T} %result
}}
a  
define internal {T} @___numba_atomic_{Tu}_inc({T}* %iptr, {T} %val) alwaysinline {{
entry:
    %old2 = load volatile {T}, {T}* %iptr
    br label %attempt

attempt:
    %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
    %bndchk = icmp ult {T} %old, %val
    %inc = add {T} %old, 1
    %new = select i1 %bndchk, {T} %inc, {T} 0
    {CAS}
    %repeat = icmp ne {T} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    ret {T} %old
}}
a  
define internal {T} @___numba_atomic_{Tu}_dec({T}* %iptr, {T} %val) alwaysinline {{
entry:
    %old2 = load volatile {T}, {T}* %iptr
    br label %attempt

attempt:
    %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
    %dec = add {T} %old, -1
    %bndchk = icmp ult {T} %dec, %val
    %new = select i1 %bndchk, {T} %dec, {T} %val
    {CAS}
    %repeat = icmp ne {T} %cas, %old
    br i1 %repeat, label %attempt, label %done

done:
    ret {T} %old
}}
a  
define internal {T} @___numba_atomic_{T}_{NAN}{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
entry:
    %ptrval = load volatile {T}, {T}* %ptr
    ; Return early when:
    ; - For nanmin / nanmax when val is a NaN
    ; - For min / max when val or ptr is a NaN
    %early_return = fcmp uno {T} %val, %{PTR_OR_VAL}val
    br i1 %early_return, label %done, label %lt_check

lt_check:
    %dold = phi {T} [ %ptrval, %entry ], [ %dcas, %attempt ]
    ; Continue attempts if dold less or greater than val (depending on whether min or max)
    ; or if dold is NaN (for nanmin / nanmax)
    %cmp = fcmp {OP} {T} %dold, %val
    br i1 %cmp, label %attempt, label %done

attempt:
    ; Attempt to swap in the value
    %old = bitcast {T} %dold to {Ti}
    %iptr = bitcast {T}* %ptr to {Ti}*
    %new = bitcast {T} %val to {Ti}
    {CAS}
    %dcas = bitcast {Ti} %cas to {T}
    br label %lt_check

done:
    ret {T} %ptrval
}}
c                     t                      j        rt                              |           S t                              |           S )NTi)r   rN   
cas_nvvm70r   
cas_nvvm34r   s    r   ir_casr   K  s?    vv (  B '''  B '''r   c           	      f    t          | |||t          |                    }t          j        di |S )N)Tr   OPFUNCCASr   )dictr   ir_numba_atomic_binary_templater   )r   r   r   r   paramss        r   ir_numba_atomic_binaryr   R  s7    A"$F2JJ???F*1;;F;;;r   c                 j    t          | |||||t          |                    }t          j        di |S )N)r   r   NANr   
PTR_OR_VALr   r   r   )r   r   ir_numba_atomic_minmax_templater   )r   r   r   r   r   r   r   s          r   ir_numba_atomic_minmaxr   W  sD    A"#"- - -F +1;;F;;;r   c                 V    t                               | |t          |                     S N)r   Tur   )ir_numba_atomic_inc_templater   r   r   r   s     r   ir_numba_atomic_incr   ^  #    '..rvayy.IIIr   c                 V    t                               | |t          |                     S r   )ir_numba_atomic_dec_templater   r   r   s     r   ir_numba_atomic_decr   b  r   r   c                    |                                  }t          |          D ]G\  }}|                    d          r-d}|                    t	                      j                  ||<    nHd                    |          S )z@
    Find the line containing the datalayout and replace it
    ztarget datalayoutztarget datalayout = "{0}"r   )
splitlinesr   
startswithr   r   rS   r   )llvmirlinesr   lntmps        r   _replace_datalayoutr  f  s     E5!!  2==,-- 	-Czz$&&"455E!HE	 99Ur   c                    dt          dddd          fdt          dd	d
d          fdt          ddd
d          fdt          dd          fdt          dd          fdt          dd	dddd          fdt          dddddd          fdt          dd	dddd          fdt          dddddd          fdt          dd	dddd          fdt          dddddd          fd t          dd	dd!dd          fd"t          dddd!dd          fd#g}t	                      j        sL|d$t                              d	%          fd&t                              d%          fgz  }t          |           } |D ]\  }}| 	                    ||          } t	                      j        rt          |           } nt          |           } | S )'NzIdeclare double @"___numba_atomic_double_add"(double* %".1", double %".2")doublei64faddadd)r   r   r   r   zEdeclare float @"___numba_atomic_float_sub"(float* %".1", float %".2")floati32fsubsubzIdeclare double @"___numba_atomic_double_sub"(double* %".1", double %".2")z=declare i64 @"___numba_atomic_u64_inc"(i64* %".1", i64 %".2")u64r   z=declare i64 @"___numba_atomic_u64_dec"(i64* %".1", i64 %".2")zEdeclare float @"___numba_atomic_float_max"(float* %".1", float %".2")r   znnan oltptrmax)r   r   r   r   r   r   zIdeclare double @"___numba_atomic_double_max"(double* %".1", double %".2")zEdeclare float @"___numba_atomic_float_min"(float* %".1", float %".2")znnan ogtr   zIdeclare double @"___numba_atomic_double_min"(double* %".1", double %".2")zHdeclare float @"___numba_atomic_float_nanmax"(float* %".1", float %".2")nanultzLdeclare double @"___numba_atomic_double_nanmax"(double* %".1", double %".2")zHdeclare float @"___numba_atomic_float_nanmin"(float* %".1", float %".2")ugtzLdeclare double @"___numba_atomic_double_nanmin"(double* %".1", double %".2"))immargr   zMdeclare i32 @"___numba_atomic_i32_cas_hack"(i32* %".1", i32 %".2", i32 %".3"))r   zMdeclare i64 @"___numba_atomic_i64_cas_hack"(i64* %".1", i64 %".2", i64 %".3"))r   r   r   r   r   rN   ir_numba_cas_hackr   r  r   llvm100_to_70_irllvm100_to_34_ir)r  replacementsdeclfns       r   llvm_replacer  s  s   	T	(ue	L	L	L	N	P	'eU	K	K	K	M	T	(ue	L	L	L	N	H	u	/	/	/	1	H	u	/	/	/	1	P	'e
+0u
> 
> 
>	? 
U	(u"+0u
> 
> 
>	? 
Q	'e
+0u
> 
> 
>	? 
U	(u"+0u
> 
> 
>	? 
T	'e5+-E
; 
; 
;	< 
X	(u%E+-E
; 
; 
;	< 
T	'e5+-E
; 
; 
;	< 
X	(u%E+-E
; 
; 
;	< 	G$LL 66 - 	\%%%..0\%%%..0
 	
 %V,,  * *bb))vv *!&))!&))Mr   c                    t          | t                    r| g} |                    dd          r|                    ddddd           t	                      }t                      }| D ]9}t          |          }|                    |                    d                     :|	                    |
                                            |j        di |S )NfastmathFT)r   r   r   r   r   r   )
isinstancestrr   updaters   r   r  r}   r   r   r   r   )r  r   cu	libdevicemods        r   llvm_to_ptxr(    s    &# xx
E"" 	
 
 	 	 	 
		BI * *3
cjj(())))y}}'''2:r   z	\!\d+\s*=zmetadata\s*\![{'\"0-9]z\!\d+z,\!{i32 \d, \!\"Debug Info Version\", i32 \d} z\s+z"^attributes #\d+ = \{ ([\w\s]+)\ }>   coldminsizeoptiszeoptnonenoinlinenoreturnnounwindreadnonereadonly
inlinehintnoduplicatealwaysinlinez"\bgetelementptr\s(?:inbounds )?\(?z=\s*\bload\s(?:\bvolatile\s)?z(call\s[^@]+\))(\s@)z\s*!range\s+!\d+z
[,{}()[\]]z\bnonnull\bz"\b(local_unnamed_addr|writeonly)\bz\((.*)\)zspFlags: (.*),isDefinitionisOptimized)DISPFlagDefinitionDISPFlagOptimizedc                    g } | j                     D ]}|                    d          rt                              |          }|                    d                                          }d                    d |D                       }|                    |                    d          |          }|                    |           d                    |          S )z,
    Convert LLVM 10.0 IR for LLVM 7.0.
    attributes #r   r)  c              3   &   K   | ]}|d k    |V  dS )
willreturnNr   r   as     r   	<genexpr>z#llvm100_to_70_ir.<locals>.<genexpr>  s,      CC1l1B1BQ1B1B1B1BCCr   r   )	r  r  re_attributes_defmatchgroupsplitr   r   r   )r
   buflinemattrss        r   r  r    s     C  ??>** 	3!''--AGGAJJ$$&&EHHCCCCCCCE<<

E22D

499S>>r   c                    d }g } | j                     D ]}|                    d          r|                    dd          }|                                                    d          rd|v r|                    dd          }t                              |          rdt                              |          u r|                    dd	          }|                    d
d          }|                    d          }|d|dz            ||dz   d         }}d }d	                    |t                              ||          f          }|                    d          rJt                              |          t                              d |          }|                    d          rt                              |          }|                    d                                          }	d	                    d |	D                       }	|                    |                    d          |	          }d|v r`t                               |          }|t#          d|          |                                }
|d|
          |||
d                   z   }d|v rNt&                              |          }|r2|                                }
|d|
          |||
d                   z   }d|v rmt(                              d|          }t*                              d|                              d          }d|v r t.                              t0          |          }d|v r$d|v r t.                              t2          |          }t4                              d|          }|                    |           d	                    |          S )z,
    Convert LLVM 10.0 IR for LLVM 3.4.
    c                 8   d}d}	 t                               | |          }|t          d|           |                                }|                    d          }|dk    r|dk    rnn|dv r|dz  }n	|dv r|dz  }z| |d                                          S )Nr   Tzfailed parsing leading type: ,z{[(r   z)]})re_type_toksearchr   endrC  lstrip)s	par_levelposrG  toks        r   parse_out_leading_typez0llvm100_to_34_ir.<locals>.parse_out_leading_type  s    		""1c**Ay"l#KLLL%%''C''!**Cczz>> " Q		Q		  w~~r   z!numba.llvm.dbg.cuz!llvm.dbg.cuz%tail call void asm sideeffect "// dbgz
!numba.dbgz!dbgNz!{zmetadata !{z!"zmetadata !"=r   c                 2    d|                      d          z   S )Nz	metadata r   )rC  rG  s    r   fix_metadata_refz*llvm100_to_34_ir.<locals>.fix_metadata_ref/  s    &33r   r)  zsource_filename =c                     dS )Nr   r   rW  s    r   <lambda>z"llvm100_to_34_ir.<locals>.<lambda>6  s     r   r;  c              3   ,   K   | ]}|t           v |V  d S rU   )supported_attributesr>  s     r   r@  z#llvm100_to_34_ir.<locals>.<genexpr><  s-      KK16J1J1JQ1J1J1J1JKKr   zgetelementptr zfailed parsing getelementptr: zload zcall z\1*\2r   rK  z@llvm.memsetdeclarer   )r  r  r   rO  re_metadata_defrB  re_metadata_correct_usagerM  findr   re_metadata_refr  re_unsupported_keywordsrA  rC  rD  re_getelementptrr   rN  re_loadre_callre_rangerstripre_parenthesized_list_replace_llvm_memset_usage _replace_llvm_memset_declarationre_annotationsr   )r
   rT  rE  rF  assigposlhsrhsrX  rG  rH  rR  s              r   r  r    s        , C K K ??/00 	F<< 4nEED KKMM$$%LMM 	6 D((<<f55D  && 	N077====||D-88||D-8899S>>A.X\]]0CS4 4 4xx!0!4!45Es!K!K!M N N??.// 	"))$//;*..||TBBD??>** 	3!''--AGGAJJ$$&&EHHKKKKKKKE<<

E22Dt## !''--Ay"l#OPPP%%''C: 6 6tCDDz B BBDd?? t$$A GeeggDSDz$:$:4:$F$FFd?? ;;x..D <<D))0055D%%,00.  %%,004  !!"d++

499S>>r   c                    t          |                     d                              d                    }t          j        d|d                   }|st          d          |                    d          }|                    dd                    |                     d                    |          }d	                    |          S )
zNReplace `llvm.memset` usage for llvm7+.

    Used as functor for `re.sub.
    r   rK  zalign (\d+)r   z+No alignment attribute found on memset destr   zi32 {}r   ({}))	listrC  rD  rerM  
ValueErrorinsertr   r   )rG  r   
align_attralignouts        r   ri  ri  i  s    
 !''!**""3''((F>6!955J $FGGG  ##
MM"hooe,,---
))F

C==r   c                     t          |                     d                              d                    }|                    dd           d                    |          }d                    |          S )zTReplace `llvm.memset` declaration for llvm7+.

    Used as functor for `re.sub.
    r   rK  r   r  r   rp  )rq  rC  rD  rt  r   r   )rG  r   rw  s      r   rj  rj  y  s`    
 !''!**""3''((F
MM"e
))F

C==r   c                 D   | j         }t          j        |d          }t          j        t          j        d          d          }|                    | ||f          }t          j        |d          }|                    |           | j	        
                    d           d S )Nkernel    r   znvvm.annotationsr.  )moduler
   MetaDataStringConstantIntTypeadd_metadatar   get_or_insert_named_metadatar  
attributesdiscard)lfuncr'  mdstrmdvaluemdnmds         r   set_cuda_kernelr    s    
,Cc8,,Ek"*R..!,,G			5%1	2	2B

.s4F
G
GCGGBKKK 
Z(((((r   c                     t          j        d          fdt                                                      D             }|                     |          }|                     d|           dS )zAdd NVVM IR version to moduler{  c                 &    g | ]} |          S r   r   )r   r   r  s     r   r   z"add_ir_version.<locals>.<listcomp>  s!    ;;;a33q66;;;r   znvvmir.versionN)r
   r  r   rA   r  add_named_metadata)r'  rI   md_verr  s      @r   add_ir_versionr    sk     *R..C;;;;466#8#8#:#:;;;Kk**F+V44444r   )drn   loggingrr  rf   r   ctypesr   r   r   r   r   r   r	   	threadingllvmliter
   rh   r   r   r   libsr   r   r   
numba.corer   r   	getLoggerrk   loggerADDRSPACE_GENERICADDRSPACE_GLOBALADDRSPACE_SHAREDADDRSPACE_CONSTANTADDRSPACE_LOCALrp   ro   rD  rd   r   r   r   r7   modulesrQ   rR   r   Lockr,   r.   r   rs   r   r   r   rF   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r  r  r(  r   r^  r_  ra  debuginfo_patternr   re_metadata_debuginforA  r\  rc  rd  re  rf  rL  rk  rb  rh  
re_spflags	spflagmapr  r  ri  rj  r  r  r   r   r   <module>r     s/     				 



                             ; ; ; ; ; ; ; ; ; ; = = = = = = = = = = & & & & & & & & 
	8	$	$       
EGG  I'(( ) )DAqGCK!1a((((; 7 
	 	 	 Y^
B B B B B6 B B BJE E E E Ef E E EP   
< 
< 
<  2  D! ! !         

# ,  (  (# @( ( (< < <
< < <J J JJ J J
 
 
< < <~  . "*\**&BJ'@AA "*X&&C "
#4#<#<S&#I#IJJ BJDEE F F F  2:CDD 
"*5
6
6
"*,
-
-2:)**bj''N++$"*%JKK "
;// RZ)**
 )& 	  $h h hV     ) ) )5 5 5 5 5r   