
    &Vfq.                     t    d dl mZ d dl mZ d dlmZ d dlmZ  ed           G d de                      ZdS )	    )backend)ops)keras_export)Layerzkeras.layers.Attentionc                   v     e Zd ZdZ	 	 	 	 d fd	Zd Zd Zdd	Zd
 Z	 	 	 	 ddZ	ddZ
d ZddZ fdZ xZS )	Attentiona}  Dot-product attention layer, a.k.a. Luong-style attention.

    Inputs are a list with 2 or 3 elements:
    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
        supplied, `value` will be used as a `key`.

    The calculation follows the steps:
    1. Calculate attention scores using `query` and `key` with shape
        `(batch_size, Tq, Tv)`.
    2. Use scores to calculate a softmax distribution with shape
        `(batch_size, Tq, Tv)`.
    3. Use the softmax distribution to create a linear combination of `value`
        with shape `(batch_size, Tq, dim)`.

    Args:
        use_scale: If `True`, will create a scalar variable to scale the
            attention scores.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            attention scores. Defaults to `0.0`.
        seed: A Python integer to use as random seed incase of `dropout`.
        score_mode: Function to use to compute attention scores, one of
            `{"dot", "concat"}`. `"dot"` refers to the dot product between the
            query and key vectors. `"concat"` refers to the hyperbolic tangent
            of the concatenation of the `query` and `key` vectors.

    Call Args:
        inputs: List of the following tensors:
            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
                not given, will use `value` for both `key` and `value`, which is
                the most common case.
        mask: List of the following tensors:
            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
                If given, the output will be zero at the positions where
                `mask==False`.
            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
                If given, will apply the mask such that values at positions
                 where `mask==False` do not contribute to the result.
        return_attention_scores: bool, it `True`, returns the attention scores
            (after masking and softmax) as an additional output argument.
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).
        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
            a mask such that position `i` cannot attend to positions `j > i`.
            This prevents the flow of information from the future towards the
            past. Defaults to `False`.

    Output:
        Attention outputs of shape `(batch_size, Tq, dim)`.
        (Optional) Attention scores after masking and softmax with shape
            `(batch_size, Tq, Tv)`.
    Fdot        Nc                      t                      j        di | || _        || _        || _        | j        dk    r%t
          j                            |          | _        | j        dvrt          d|           d S )Nr   seed)r	   concatz_Invalid value for argument score_mode. Expected one of {'dot', 'concat'}. Received: score_mode= )
super__init__	use_scale
score_modedropoutr   randomSeedGeneratorseed_generator
ValueError)selfr   r   r   r   kwargs	__class__s         a/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/layers/attention/attention.pyr   zAttention.__init__A   s     	""6""""$<!").">">D">"I"ID?"3335(25 5   43    c                    |                      |           d | _        d | _        | j        r$|                     ddd| j        d          | _        | j        dk    r$|                     ddd| j        d          | _        d| _        d S )Nscaler   onesT)nameshapeinitializerdtype	trainabler   concat_score_weight)_validate_inputsr   r&   r   
add_weightr$   r   builtr   input_shapes     r   buildzAttention.buildW   s    k***
#' > 	"j )  DJ ?h&&'+*"j (7 ( (D$ 


r   c                    | j         dk    r=t          j        |t          j        |g d                    }| j        
|| j        z  }n| j         dk    rt          j        |d          }t          j        |d          }| j        <| j        t          j        t          j        | j        ||z   z            d	          z  }n3| j        t          j        t          j        ||z             d	          z  }|S )
a  Calculates attention scores as a query-key dot product.

        Args:
            query: Query tensor of shape `(batch_size, Tq, dim)`.
            key: Key tensor of shape `(batch_size, Tv, dim)`.

        Returns:
            Tensor of shape `(batch_size, Tq, Tv)`.
        r	   )r         )axesNr   axis)	r   r   matmul	transposer   expand_dimsr&   sumtanh)r   querykeyscores
q_reshaped
k_reshapeds         r   _calculate_scoreszAttention._calculate_scoresm   s    ?e##Zs}Syyy'I'I'IJJFz%$*$_(( R888J2666Jz%1CGHTZ:
+BCDD25 5 5  1CGHZ*455B5 5 5  r   c                 Z   |Dt          j        |          }|j        dk    rdnd}||t          j        ||j                  z  z  }t          j        |d          }|r7| j        dk    r,t          j                            || j        | j        	          }t          j	        ||          |fS )
a  Applies attention scores to the given value tensor.

        To use this method in your attention layer, follow the steps:

        * Use `query` tensor of shape `(batch_size, Tq)` and `key` tensor of
            shape `(batch_size, Tv)` to calculate the attention `scores`.
        * Pass `scores` and `value` tensors to this method. The method applies
            `scores_mask`, calculates
            `attention_distribution = softmax(scores)`, then returns
            `matmul(attention_distribution, value).
        * Apply `query_mask` and return the result.

        Args:
            scores: Scores float tensor of shape `(batch_size, Tq, Tv)`.
            value: Value tensor of shape `(batch_size, Tv, dim)`.
            scores_mask: A boolean mask tensor of shape `(batch_size, 1, Tv)`
                or `(batch_size, Tq, Tv)`. If given, scores at positions where
                `scores_mask==False` do not contribute to the result. It must
                contain at least one `True` value in each line along the last
                dimension.
            training: Python boolean indicating whether the layer should behave
                in training mode (adding dropout) or in inference mode
                (no dropout).

        Returns:
            Tensor of shape `(batch_size, Tq, dim)`.
            Attention scores after masking and softmax with shape
                `(batch_size, Tq, Tv)`.
        Nfloat16g     @g    eAr$   r5   r2   r   r   )
r   logical_notr$   castsoftmaxr   r   r   r   r6   )r   r=   valuescores_masktrainingpadding_mask	max_valueweightss           r   _apply_scoreszAttention._apply_scores   s    < "?;77L $*<9#<#<%Ii#(<v|"L"L"LLLF+f2... 	q((n,,( -  G
 z'5))722r   c                 `   |rt          j        |          }d|d         |d         f}t          j        |d          }t          j        |d          }t          j        |d          }t          j        ||          }	|+t          j        |d          }t          j        ||	          S |	S |S )Nr/   r1   r5   int32)r"   r$   r2   )r   r"   r    cumsumgreater_equalr8   logical_and)
r   r=   v_maskuse_causal_maskscore_shape
mask_shape	ones_mask	row_index	col_indexcausal_masks
             r   _calculate_score_maskzAttention._calculate_score_mask   s     	 )F++K[_k"o>JzAAAI
92666I
92666I+IyAAK!b999v{;;; Mr   c                    |                      ||           |d         }|d         }t          |          dk    r|d         n|}|r|d         nd }	|r|d         nd }
|                     ||          }|                     ||
|          }|                     ||||          \  }}|	4t          j        |	d          }	|t          j        |	|j        	          z  }|r||fS |S )
Ninputsmaskr   r/   r.   )r;   r<   )r=   rG   rH   rI   r5   r2   rC   )	r'   lenr@   r[   rM   r   r8   rE   r$   )r   r^   r_   rI   return_attention_scoresrT   qvkq_maskrS   r=   rH   resultattention_scoress                  r   callzAttention.call   s    	V$7771I1IVqF1IIa *ad *ad''aQ'7700FO
 
 $(#5#5h $6 $
 $
   _V"555FchvV\::::F" 	,+++r   c                 |    |                      ||           ||d         d S t          j        |d                   S )Nr]   r   )r'   r   convert_to_tensor)r   r^   r_   s      r   compute_maskzAttention.compute_mask   sB    V$777<47?4$T!W---r   c                 D    g |d         dd         |d         d         R S )z>Returns shape of value tensor dim, but for query tensor lengthr   Nr5   r/   r   r*   s     r   compute_output_shapezAttention.compute_output_shape   s+    9Q$9k!nR&8999r   c                    | j         j        }t          |t                    st	          | d| d          t          |          dk     st          |          dk    r"t	          | dt          |           d          |ht          |t                    st	          | d| d          t          |          dk     st          |          dk    rt	          | d| d	| d          dS dS )
z'Validates arguments of the call method.zj layer must be called on a list of inputs, namely [query, value] or [query, value, key]. Received: inputs=.r.      zl layer accepts inputs list of length 2 or 3, namely [query, value] or [query, value, key]. Received length: NzL layer mask must be a list, namely [query_mask, value_mask]. Received: mask=z< layer accepts mask list of length 2 or 3. Received: inputs=z, mask=)r   __name__
isinstancelistr   r`   )r   r^   r_   
class_names       r   r'   zAttention._validate_inputs   sa   ^,
&$'' 	 . .$*. . .  
 v;;??c&kkAoo 3 3$'KK3 3 3  
 dD))  ! O OGKO O O   4yy1}}D		A ! ? ?(.? ?7;? ? ?    !.r   c                 z    t                                                      }| j        | j        | j        d}i ||S )N)r   r   r   )r   
get_configr   r   r   )r   base_configconfigr   s      r   rv   zAttention.get_config  sE    gg((**/|
 

 )+(((r   )Fr	   r
   N)NF)NFFF)N)rq   
__module____qualname____doc__r   r,   r@   rM   r[   rh   rk   rm   r'   rv   __classcell__)r   s   @r   r   r      s
       6 6t      ,  ,  >,3 ,3 ,3 ,3\  4  %   :. . . .: : :   6) ) ) ) ) ) ) ) )r   r   N)	keras.srcr   r   keras.src.api_exportr   keras.src.layers.layerr   r   r   r   r   <module>r      s                - - - - - - ( ( ( ( ( ( &''R) R) R) R) R) R) R) ('R) R) R)r   