
    ܙdf;                     X   d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ d dlmZmZmZ d dlm Z  d dl!m"Z"  G d de	j                  Z# e j$        de j%                  Z& G d de          Z' G d de          Z(dS )    N)cached_property)ir)typingtypes	debuginfoitanium_manglercgutils)
Dispatcher)NumbaInvalidConfigWarning)BaseContext)MinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_manager)warnc                   $     e Zd Zd Z fdZ xZS )CUDATypingContextc                    ddl m}m}m}m} ddlm}m} |                     |j	                   |                     |j	                   |                     |j	                   |                     t          j	                   |                     |j	                   |                     |j	                   |                     |j                   d S )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr   r   install_registryregistryr   typing_registry)selfr   r   r   r   r   r   s          1lib/python3.11/site-packages/numba/cuda/target.pyload_additional_registriesz,CUDATypingContext.load_additional_registries   s    EEEEEEEEEEEE::::::::h/000j1222h/000i0111m4555h/000l:;;;;;    c                    ddl m} t          |t                    rt          ||          s	 |j        }n# t
          $ r |j        st          d          |j        	                                }d|d<   |
                    dd          |d<   |
                    dd          |d<    ||j        |          }||_        |}Y nw xY wt          t          |                               |          S )	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTdevicedebugFopt)numba.cuda.dispatcherr*   
isinstancer
   _CUDATypingContext__dispatcherAttributeError_can_compile
ValueErrortargetoptionscopygetpy_funcsuperr   resolve_value_type)r%   valr*   r4   disp	__class__s        r&   r9   z$CUDATypingContext.resolve_value_type%   s#   888888sJ'' 	3//	&!   ' H$ &G H H H # 1 6 6 8 8*.h')6):):7E)J)Jg&'4'8'8'E'Ee$%~ck=AA $(  &--@@EEEs   6 B
CC)__name__
__module____qualname__r'   r9   __classcell__r<   s   @r&   r   r      sP        
< 
< 
<F F F F F F F F Fr(   r   z	[^a-z0-9]c                        e Zd ZdZdZd fd	Zed             Zed             Zd Z	d Z
d Zd	 Zed
             Zed             Zed             ZddddZ	 ddZd Zd Zd Zd Zd Zd Z xZS )CUDATargetContextTcudac                     t                                          ||           t          j        t          j                  | _        d S N)r8   __init__r   chainr   default_managerdata_model_manager)r%   	typingctxtargetr<   s      r&   rG   zCUDATargetContext.__init__I   s>    F+++"3"9%#
 #
r(   c                     t          j                    j        rt          j        S d}t          t          |                     t          j        S )Nz3debuginfo is not generated for CUDA toolkits < 11.2)r   NVVM	is_nvvm70r   	DIBuilderr   r   DummyDIBuilder)r%   msgs     r&   rP   zCUDATargetContext.DIBuilderO   sA    9;;  	,&&GC*3//000++r(   c                     dS )NF r%   s    r&   enable_boundscheckz$CUDATargetContext.enable_boundscheckX   s	     ur(   c                 6    | j                             |          S rF   )_internal_codegen_create_empty_module)r%   names     r&   create_modulezCUDATargetContext.create_module^   s    %::4@@@r(   c                 F    t          j        d          | _        d | _        d S )Nznumba.cuda.jit)r   JITCUDACodegenrX   _target_datarU   s    r&   initzCUDATargetContext.inita   s$    !(!78H!I!I r(   c                    ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddlm} ddlm} d	d
lm}m}m}m}m} ddlm} |                     |j                   |                     |
j                   |                     |j                   |                     |j                   |                     |	j                   |                     |j                   |                     |j                   d S )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)numba.cpythonra   rb   rc   rd   re   rf   rg   rh   ri   
numba.miscrj   numba.nprk   rl   r    rm   rn   ro   rp   r   numba.np.unsaferq   r"   r#   impl_registry)r%   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   r   rq   s                      r&   r'   z,CUDATargetContext.load_additional_registriese   s    	=<<<<<<<<<??????????22222222++++++''''''%%%%%%''''''	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	,+++++h/000h/000i0111m4555i0111h/000l899999r(   c                     | j         S rF   )rX   rU   s    r&   r   zCUDATargetContext.codegen}   s    %%r(   c                 |    | j         /t          j        t          j                    j                  | _         | j         S rF   )r^   llcreate_target_datar   rN   data_layoutrU   s    r&   target_datazCUDATargetContext.target_data   s0    $ " 5dikk6M N ND  r(   c                 N    ddl m d}t          fd|D                       }|S )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   rD   )	threadIdxblockDimblockIdxgridDimlaneidwarpsizec                 <    g | ]}t          j                  |fS rT   )r   Module).0ncrD   s     r&   
<listcomp>z;CUDATargetContext.nonconst_module_attrs.<locals>.<listcomp>   s8     $9 $9 $9(* &+\$%7%7$< $9 $9 $9r(   )numbarD   tuple)r%   	nonconstsnonconsts_with_modrD   s      @r&   nonconst_module_attrsz'CUDATargetContext.nonconst_module_attrs   sZ     	!	" $9 $9 $9 $9.7$9 $9 $9 : :!!r(   c                      t          |           S rF   )CUDACallConvrU   s    r&   	call_convzCUDATargetContext.call_conv   s    D!!!r(   rT   Nabi_tagsuidc                2    t          j        ||||          S )Nr   )r   mangle)r%   rZ   argtypesr   r   s        r&   manglerzCUDATargetContext.mangler   s%    %dHx*-/ / / 	/r(   c	           	         t          j        |j        d          }	|                                                     |j         d|	||          }
|
                    |           |                     |
||	||||          }|
|fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        lineinfo:      Whether to emit line info.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapyns_kernel_)
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   create_libraryrZ   add_linking_librarygenerate_kernel_wrapper)r%   codelibfndescr,   lineinfor   filenamelinenumr   kernel_namelibrarywrappers               r&   prepare_cuda_kernelz%CUDATargetContext.prepare_cuda_kernel   s    . &7!h
 
 
 ,,..//7<0I0I0I;F=I>K 0 M M 	##G,,,..w/4h/68 8 r(   c                 	  $% |j         }|                     |          }	t          |	j                  }
t	          j        t	          j                    |
          }|                     d          %t	          j        t	          j        d          | j	        
                    t          j                  g|
z             }t	          j        %||j                  }t          j        |j        d          }t	          j        %||          $t	          j        $                    d                    }|s|rH|o| }|                     %|| |          } |j        $||j        ||            |j        ||           $%fd} |d          }g }g }d	D ]D}|                     |d
|z                       |                     |d|z                       E|	                    |$j                  }| j	                            ||t          j        ||          \  }}|r5t9          j        ||j                  5  |                                 ddd           n# 1 swxY w Y   |                     |!                    |j"                            5  t	          j#        |j$        j%        d          }tM          j'                    j(        r5|)                    |||j*        dd          }|+                    |d          }nzt	          j        |j$        |j$        |j$        |j$        g          }d}t	          j        %||          }|,                    ||||j*        g          }|-                    d||          }t]          j/        |          } |                     |          5  ta          d	|          D ]0\  }!}"| 1                    |!          }#|2                    |#|"           1ta          d	|          D ]0\  }!}"| 3                    |!          }#|2                    |#|"           1	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |                                 tM          j4        $           |5                    %           |s|r |j6                     |6                                 |7                    $j                  $$S )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    r   r   r    )modulefilepathcgctxdirectives_onlyc                     j         | z   }t          j        t          j        d          |          }t          j        |j        j        d           |_        |S )Nr   )	rZ   r	   add_global_variabler   IntTypeConstanttypepointeeinitializer)postfixrZ   gvwrapfnwrapper_modules      r&   define_error_gvzBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gv   sL    ;(D,^RZ^^-13 3B[$??BNIr(   __errcode__xyzz	__tid%s__z__ctaid%s__N	monotonicr   ___numba_atomic_i32_cas_hack)rZ   z==)8r   get_arg_packerlistargument_typesr   FunctionTypeVoidTyper[   r   r   get_return_typer   pyobjectFunctionr   r   r   rZ   	IRBuilderappend_basic_blockrP   mark_subprogramargsmark_locationappendfrom_argumentscall_functionvoidr	   	if_likelyis_okret_voidif_thennot_is_python_excr   r   r   r   rN   rO   cmpxchgcodeextract_valuecallicmp_unsignedr   SRegBuilderziptidstorectaidset_cuda_kerneladd_ir_modulefinalizeget_function)&r%   r   r   r   r,   r   r   r   r   arginfoargtyswrapfntyfntyfuncprefixedbuilderr   r   r   gv_excgv_tidgv_ctaidicallargsstatus_oldxchgchangedcasfntycas_hackcasfnsregdimptrr:   r   r   s&                                       @@r&   r   z)CUDATargetContext.generate_kernel_wrapper   s    ?%%h//g,--?2;==&99++,ABBrz"~~ $ > >u~ N NO!' () ) {>41FGG"4TY8LLL^Xx@@,v88<<== 		6H 		6&4u9On08-17F ' H HI &I%V[(G   $I#GW555	 	 	 	 	 	 !// 	@ 	@AMM//+/::;;;OOOOMA,=>>????))'6;??N00T5:x; ;	  "	0"7FL99 # #  """# # # # # # # # # # # # # # # f.B!C!CDD 0 0k&+"5t<<
 9;;( E"??63+6E ED%33D!<<GG ochch9<9C D DG  >HKhOOOE"<<V[/IJJD%33D$DDG !,W55__W-- 0 0%(%7%7 0 0	S"hhsmmc3////%(%9%9 0 0	S"jjooc3////00 0 0 0 0 0 0 0 0 0 0 0 0 0 0-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0> 	V$$$n--- 	!H 	!I   %%fk22sJ   9II!IDQ	"BP2&Q	2P6	6Q	9P6	:Q		QQc           	          |j         } fdt          |                    d                    D             }t          j        t          j        d          t          |                    }t          j        ||          }t          j	        }t          j        ||j        d|          }	d|	_        d|	_        ||	_                             |j                  }
                     |
          }d	|d
z
                                  z  |	_        t          j        t          j        d                    }|                    |	|d          }                      |           |          } fd|j        D             } fd|j        D             }                     ||                    ||j        j                  |||j        |j        d           |                                S )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        c                 P    g | ]"}                     t          j        |          #S rT   )get_constantr   byte)r   r   r%   s     r&   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>*  s;     
 
 
 ej!,,
 
 
r(   A)order   _cudapy_cmem	addrspaceinternalT   r   genericc                 P    g | ]"}                     t          j        |          #S rT   r  r   intpr   sr%   s     r&   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>C  s+    FFFq$##EJ22FFFr(   c                 P    g | ]"}                     t          j        |          #S rT   r  r  s     r&   r   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>D  s+    JJJD%%ej!44JJJr(   N)datashapestridesitemsizeparentmeminfo) r   itertobytesr   	ArrayTyper   lenr   r   ADDRSPACE_CONSTANTr	   r   r   linkageglobal_constantr   get_data_typedtypeget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecast
make_arrayr  r  populate_arraybitcastr  r  r  	_getvalue)r%   r   arytyarrlmod	constvals
constarytyconstaryr  r   lldtyper&  ptrtygenptrarykshapekstridess   `                r&   make_constant_arrayz%CUDATargetContext.make_constant_array"  s    ~
 
 
 
#++C+0011
 
 
	 \"*Q--Y@@
;z955+	(x}n3<> > >
!! $$U[11##G,,..000 rz!}}--&&r5)<< %dooe$$T733FFFFCIFFFJJJJckJJJCgoofchm&L&L"($,%(\#*$(	 	 	* 	* 	* }}r(   c                    t          j        |                    d          dz             }d                    dt	          j        |          g          }|j                            |          }|<t          j        ||j	        |t          j                  }d|_        d|_        ||_        |j	        j        j        }|                    |                    t          j                            S )	r  zutf-8    $__conststring__Nr
  r  T)r	   make_bytearrayencodejoinr   mangle_identifierglobalsr6   r   r   r   r  r   r!  r   r   elementr+  
as_pointer)r%   modstringtextrZ   r   chartys          r&   insert_const_stringz%CUDATargetContext.insert_const_stringM  s    
 %fmmG&<&<w&FGGxx*(:6BBD E E [__T"":,S$)T7;7NP P PB#BJ!%B!BN (zz&++D,CDDEEEr(   c                     |j         }|                     ||          }t          j        t          j        d                    }|                    ||d          S )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        r  r  )r   rI  r   r'  r   r(  )r%   r   rF  r/  r   	charptrtys         r&   insert_string_const_addrspacez/CUDATargetContext.insert_string_const_addrspacec  sO     ~%%dF33N2:a==11	$$RI>>>r(   c                     dS )zRun O1 function passes
        NrT   )r%   r   s     r&   optimize_functionz#CUDATargetContext.optimize_functiono  s	     	r(   c                 *    t          j        |          S rF   )r   get_ufunc_info)r%   	ufunc_keys     r&   rP  z CUDATargetContext.get_ufunc_info|  s    $Y///r(   r~   rF   )r=   r>   r?   implement_powi_as_math_callstrict_alignmentrG   propertyrP   rV   r[   r_   r'   r   r|   r   r   r   r   r   r   r9  rI  rL  rN  rP  r@   rA   s   @r&   rC   rC   E   s       "&
 
 
 
 
 
 , , X,   X
A A A! ! !: : :0& & & ! ! X!
 " " _" " " _" 35$ / / / / / +/"  "  "  " H` ` `D) ) )VF F F,
? 
? 
?  0 0 0 0 0 0 0r(   rC   c                       e Zd ZdS )r   N)r=   r>   r?   rT   r(   r&   r   r     s        Dr(   r   ))re	functoolsr   llvmlite.bindingbindingry   llvmliter   
numba.corer   r   r   r   r	   numba.core.dispatcherr
   numba.core.errorsr   numba.core.baser   numba.core.callconvr   r!   r   r   cudadrvr   
numba.cudar   r   r   numba.cuda.modelsr   warningsr   r   compileIVALID_CHARSrC   r   rT   r(   r&   <module>rg     s   				 % % % % % %             I I I I I I I I I I I I I I , , , , , , 7 7 7 7 7 7 ' ' ' ' ' ' / / / / / / ' ' ' ' ' '                   1 1 1 1 1 1 1 1 1 1 / / / / / /      $F $F $F $F $F* $F $F $FT bjrt,,x0 x0 x0 x0 x0 x0 x0 x0v		 	 	 	 	? 	 	 	 	 	r(   