
    |Leu/                       d dl mZ d dlmZmZ d dlmZ 	 d dlmZ n# e$ r  e	d          ZY nw xY wd dl
Z
d dlZd dl
mZ 	 d dlZej        ej        u rd e_        n# e$ r dZY nw xY wd	 Zd
 Z ee
j                   ed          k    r5 ej        d          d             Z ej        d          d             Zn] ee
j                   ed          k    r5 ej        d          d             Z ej        d          d             Zn ed          ej        d             Zd*dZej        d             Z ee
j                   ed          k    r5 ej        d          d             Z ej        d          d             Zn4 ej        d          d             Z ej        d          d             Z ej        d          d             Z ej        d          d             Zej        d             Zej        d             Z ej        d          d              Zej        d!             Z ej        d"             Z!ej        d#             Z" ej        d          d$             Z#ej        d%             Z$ej        d&             Z% ej        d          d'             Z&ej        d(             Z'ej        d)             Z(dS )+    )annotations)ceilisnan)Version)nanr   N)cudac                 2    t          j        d | D              S )Nc                T    g | ]%}t          |t          j                  r|j        n|&S  )
isinstancecupyndarraydtype).0args     Ilib/python3.11/site-packages/datashader/transfer_functions/_cuda_utils.py
<listcomp>z<lambda>.<locals>.<listcomp>   s?        &c4<88Aciic       )npresult_type)argss    r   <lambda>r      s)       *
 r   c                ,   t          | t                    r| f} t          j                    j        }t          t          |dz            dt          |           z  z            ft          |           z  }t          fd| D                       }||fS )aP  
    Compute the blocks-per-grid and threads-per-block parameters for use when
    invoking cuda kernels

    Parameters
    ----------
    shape: int or tuple of ints
        The shape of the input array that the kernel will parallelize over

    Returns
    -------
    tuple
        Tuple of (blocks_per_grid, threads_per_block)
    g       @g      ?c              3  V   K   | ]#}t          t          |z                      V  $d S N)intr   )r   dthreads_per_blocks     r   	<genexpr>zcuda_args.<locals>.<genexpr>6   s9      @@QD..//00@@@@@@r   )r   r   r   get_current_deviceMAX_THREADS_PER_BLOCKr   lentuple)shapemax_threadstpbbpgr   s       @r   	cuda_argsr(      s     % )++AK Ds!233c%jj8HIJJ
U
+C
@@@@%@@@
@
@C8Or   c                Z    t          t          | j                           | |||           dS )a  
    Clip the elements of an input array between lower and upper bounds,
    skipping over elements that are masked out.

    Parameters
    ----------
    data: cupy.ndarray
        Numeric ndarray that will be clipped in-place
    mask: cupy.ndarray
        Boolean ndarray where True values indicate elements that should be
        skipped
    lower: int or float
        Lower bound to clip to
    upper: int or float
        Upper bound to clip to

    Returns
    -------
    None
        data array is modified in-place
    N)masked_clip_2d_kernelr(   r$   )datamaskloweruppers       r   masked_clip_2dr/   <   s-    , )DJ//0tUEJJJJJr   z0.51.0T)devicec                D    t           j                            | ||          S r   )r   atomicnanminaryidxvals      r   cuda_atomic_nanminr8   X       {!!#sC000r   c                D    t           j                            | ||          S r   )r   r2   nanmaxr4   s      r   cuda_atomic_nanmaxr<   [   r9   r   z0.49.1c                D    t           j                            | ||          S r   )r   r2   minr4   s      r   r8   r8   _       {sC---r   c                D    t           j                            | ||          S r   )r   r2   maxr4   s      r   r<   r<   b   r?   r   z0Datashader's CUDA support requires numba!=0.50.0c                    t          j        d          \  }}| j        \  }}|dk    rD||k     r@|dk    r<||k     r8|||f         s0t          | ||f|           t	          | ||f|           d S d S d S d S d S d S )N   r   )r   gridr$   r<   r8   )r+   r,   r-   r.   ijmaximaxjs           r   r*   r*   i   s    9Q<<DAqJD$Avv!d((qAvv!d((41:(4!Q///4!Q///// v((vv((((r   c                   t          j        |           } t          j        |          }t          j        |          }t          t           d          rt          j        | ||||          S t          j        | j        t           j                  }t          | j                  dk    sJ ||d         }t          |          }||d         }t          |          }t          t          | j                           |                     t           j                  |                    t           j                  |                    t           j                  |||           |S )z_
    cupy implementation of np.interp, falls back to cupy implementation
    if available.
    interp)r   rC   Nr   )r   asarrayhasattrrJ   zerosr$   float64r"   floatinterp2d_kernelr(   astype)xxpfpleftrightoutput_ys         r   rJ   rJ   r   s   
 	QA	b		B	b		BtX 3{1b"dE222z!'666Hqw<<1|!u;;D}2%LLEIag&&'			$, 7 74<9P9PRVX]   Or   c                4   t          j        d          \  }}|| j        d         k     r|| j        d         k     r| ||f         }t          |          rt          |||f<   d S ||d         k     r	||||f<   d S ||d         k    r	||||f<   d S t          |          dz
  }	d}
	 d|
|	z   dz  z   }||         |k     r|}
n||dz
           |k    r|dz
  }	nn1||dz
           }||         }||dz
           }||         }||z
  ||z
  z  }||||z
  z  z   }||||f<   d S d S d S )NrC   r      rK   )r   rD   r$   r   r   r"   )rS   rT   rU   rV   rW   rX   rE   rF   xvalupper_ilower_istop_ix0x1y0y1slopey_interps                     r   rQ   rQ      su   9Q<<DAq171:~~!agaj..Aw;; 	& HQTNNNBqE\\!HQTNNNRV^^"HQTNNN "ggkGGg/A55f:$$$GG
^d**$qjGG FQJBFBFQJBFB"Wb)EETBY//H &HQTNNNA ~..r   z0.57c                    t           j                            | |dd          dk    r'	 t           j                            | |dd          dk    't          j                     d S Nr   rZ   )r   r2   casthreadfencemutexindexs     r   cuda_mutex_lockrl      sZ    kooeUAq11Q66 kooeUAq11Q66r   c                n    t          j                     t           j                            | |d           d S Nr   r   rh   r2   exchri   s     r   cuda_mutex_unlockrq      s1    q)))))r   c                    t           j                            | dd          dk    r&	 t           j                            | dd          dk    &t          j                     d S rf   )r   r2   compare_and_swaprh   ri   s     r   rl   rl      sZ    k**5!Q771<< k**5!Q771<<r   c                n    t          j                     t           j                            | dd           d S rn   ro   ri   s     r   rq   rq      s1    1%%%%%r   c                ~    t          |           }t          |dz
  |d          D ]}| |dz
           | |<   || |<   |dz   S )a(  Insert a value into a 1D array at a particular index, but before doing
    that shift the previous values along one to make room. For use in
    ``FloatingNReduction`` classes such as ``max_n`` and ``first_n`` which
    store ``n`` values per pixel.

    Parameters
    ----------
    target : 1d numpy array
        Target pixel array.

    value : float
        Value to insert into target pixel array.

    index : int
        Index to insert at.

    Returns
    -------
    Index beyond insertion, i.e. where the first shifted value now sits.
    rZ   rK   )r"   range)targetvaluerk   nrE   s        r   cuda_shift_and_insertrz      sT    , 	FA1Q3r""    1Q3Kq		F5M19r   c                    t          |           }d}|D ][}t          |          r dS t          ||          D ]6}t          | |                   s|| |         k    rt          | ||          } n7\dS )ad  Single pixel implementation of nanmax_n_in_place.
    ret_pixel and other_pixel are both 1D arrays of the same length.

    Walk along other_pixel a value at a time, find insertion index in
    ret_pixel and shift values along to insert.  Next other_pixel value is
    inserted at a higher index, so this walks the two pixel arrays just once
    each.
    r   Nr"   r   rv   rz   	ret_pixelother_pixelry   istartother_valuerE   s         r   _cuda_nanmax_n_implr           	IAF"   	EE61%%  1&& +	!*D*D29k1MMFE +E r   c                    | j         \  }}}}t          j        d          \  }}}||k     r0||k     r,||k     r(t          | |||f         ||||f                    dS dS dS dS )z-CUDA equivalent of nanmax_n_in_place_4d.
       Nr$   r   rD   r   	retothernynxncat_nrS   ycats	            r   cuda_nanmax_n_in_place_4dr      z     yBD"	!IAq#2vv!b&&S4ZZC1c	NE!Q),<===== v&&ZZr   c                    | j         \  }}}t          j        d          \  }}||k     r(||k     r$t          | ||f         |||f                    dS dS dS )z-CUDA equivalent of nanmax_n_in_place_3d.
    rC   Nr   r   r   r   r   r   rS   r   s          r   cuda_nanmax_n_in_place_3dr     d     JBB9Q<<DAq2vv!b&&C1IuQT{33333 v&&r   c                    t          |           }d}|D ][}t          |          r dS t          ||          D ]6}t          | |                   s|| |         k     rt          | ||          } n7\dS )ad  Single pixel implementation of nanmin_n_in_place.
    ret_pixel and other_pixel are both 1D arrays of the same length.

    Walk along other_pixel a value at a time, find insertion index in
    ret_pixel and shift values along to insert.  Next other_pixel value is
    inserted at a higher index, so this walks the two pixel arrays just once
    each.
    r   Nr|   r}   s         r   _cuda_nanmin_n_implr     r   r   c                    | j         \  }}}}t          j        d          \  }}}||k     r0||k     r,||k     r(t          | |||f         ||||f                    dS dS dS dS )z-CUDA equivalent of nanmin_n_in_place_4d.
    r   Nr$   r   rD   r   r   s	            r   cuda_nanmin_n_in_place_4dr   '  r   r   c                    | j         \  }}}t          j        d          \  }}||k     r(||k     r$t          | ||f         |||f                    dS dS dS )z-CUDA equivalent of nanmin_n_in_place_3d.
    rC   Nr   r   s          r   cuda_nanmin_n_in_place_3dr   1  r   r   c                   | j         \  }}}t          j        d          \  }}}||k     rU||k     rQ||k     rM||||f         dk    r@| |||f         dk    s||||f         | |||f         k     r||||f         | |||f<   dS dS dS dS dS dS )z)CUDA equivalent of row_min_in_place.
    r   rK   N)r$   r   rD   )r   r   r   r   r   rS   r   r   s           r   cuda_row_min_in_placer   ;  s     9LBD	!IAq#2vv!b&&S4ZZAsb  c!Q)n&:&:eAq#I>NQTUVXY[^U^Q_>_>_"1a9-C1c	NNN v&&ZZ  >_>_r   c                    t          |           }d}|D ]I}|dk    r dS t          ||          D ]-}| |         dk    s|| |         k    rt          | ||          } n.JdS )ae  Single pixel implementation of row_max_n_in_place.
    ret_pixel and other_pixel are both 1D arrays of the same length.

    Walk along other_pixel a value at a time, find insertion index in
    ret_pixel and shift values along to insert.  Next other_pixel value is
    inserted at a higher index, so this walks the two pixel arrays just once
    each.
    r   rK   Nr"   rv   rz   r}   s         r   _cuda_row_max_n_implr   F       	IAF"  "EE61%%  Q<2%%y|)C)C29k1MMFE *D r   c                    | j         \  }}}}t          j        d          \  }}}||k     r0||k     r,||k     r(t          | |||f         ||||f                    dS dS dS dS )z.CUDA equivalent of row_max_n_in_place_4d.
    r   Nr$   r   rD   r   r   s	            r   cuda_row_max_n_in_place_4dr   \  z     yBD"	!IAq#2vv!b&&S4ZZSAs^U1a9-=>>>>> v&&ZZr   c                    | j         \  }}}t          j        d          \  }}||k     r(||k     r$t          | ||f         |||f                    dS dS dS )z.CUDA equivalent of row_max_n_in_place_3d.
    rC   Nr   r   s          r   cuda_row_max_n_in_place_3dr   f  d     JBB9Q<<DAq2vv!b&&SAYad44444 v&&r   c                    t          |           }d}|D ]I}|dk    r dS t          ||          D ]-}| |         dk    s|| |         k     rt          | ||          } n.JdS )ae  Single pixel implementation of row_min_n_in_place.
    ret_pixel and other_pixel are both 1D arrays of the same length.

    Walk along other_pixel a value at a time, find insertion index in
    ret_pixel and shift values along to insert.  Next other_pixel value is
    inserted at a higher index, so this walks the two pixel arrays just once
    each.
    r   rK   Nr   r}   s         r   _cuda_row_min_n_implr   p  r   r   c                    | j         \  }}}}t          j        d          \  }}}||k     r0||k     r,||k     r(t          | |||f         ||||f                    dS dS dS dS )z.CUDA equivalent of row_min_n_in_place_4d.
    r   Nr$   r   rD   r   r   s	            r   cuda_row_min_n_in_place_4dr     r   r   c                    | j         \  }}}t          j        d          \  }}||k     r(||k     r$t          | ||f         |||f                    dS dS dS )z0CUDA equivalent of row_min_n_in_place_4=3d.
    rC   Nr   r   s          r   cuda_row_min_n_in_place_3dr     r   r   )NN))
__future__r   mathr   r   packaging.versionr   r   ImportErrorrP   numbanumpyr   r   r   r   r(   r/   __version__jitr8   r<   r*   rJ   rQ   rl   rq   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s2   " " " " " "         % % % % % %   
%,,CCC           KKK2>))
 
    DDD  <K K K6 75!2!222TXT1 1 1TXT1 1 1 1WU778#4#444TXT. . .TXT. . . . +H
I
II 0 0 
0   2 "& "& 
"&J 7500TXT  
 TXT* * * * TXT  
 TXT& & &
 
  8 
  * > > 
> 4 4 
4 
  * > > 
> 4 4 
4 . . 
. 
  * ? ? 
? 5 5 
5 
  * ? ? 
? 5 5 
5 5 5s    00A A'&A'