
    &Vf                     h    d dl mZ d dlmZ d dlmZ  ed           G d de                      ZdS )    )ops)keras_export)	Attentionzkeras.layers.AdditiveAttentionc                   >     e Zd ZdZ	 	 d fd	Zd Zd Z fdZ xZS )	AdditiveAttentionaV
  Additive attention layer, a.k.a. Bahdanau-style attention.

    Inputs are a list with 2 or 3 elements:
    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
        supplied, `value` will be used as `key`.

    The calculation follows the steps:
    1. Calculate attention scores using `query` and `key` with shape
        `(batch_size, Tq, Tv)` as a non-linear sum
        `scores = reduce_sum(tanh(query + key), axis=-1)`.
    2. Use scores to calculate a softmax distribution with shape
        `(batch_size, Tq, Tv)`.
    3. Use the softmax distribution to create a linear combination of `value`
        with shape `(batch_size, Tq, dim)`.

    Args:
        use_scale: If `True`, will create a scalar variable to scale the
            attention scores.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            attention scores. Defaults to `0.0`.

    Call Args:
        inputs: List of the following tensors:
            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
                not given, will use `value` for both `key` and `value`, which is
                the most common case.
        mask: List of the following tensors:
            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
                If given, the output will be zero at the positions where
                `mask==False`.
            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
                If given, will apply the mask such that values at positions
                 where `mask==False` do not contribute to the result.
        return_attention_scores: bool, it `True`, returns the attention scores
            (after masking and softmax) as an additional output argument.
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).
        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
            a mask such that position `i` cannot attend to positions `j > i`.
            This prevents the flow of information from the future towards the
            past. Defaults to `False`.

    Output:
        Attention outputs of shape `(batch_size, Tq, dim)`.
        (Optional) Attention scores after masking and softmax with shape
            `(batch_size, Tq, Tv)`.
    T        c                 @     t                      j        d||d| d S )N)	use_scaledropout )super__init__)selfr
   r   kwargs	__class__s       j/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/layers/attention/additive_attention.pyr   zAdditiveAttention.__init__<   s/     	H9gHHHHHHH    c                     |                      |           |d         d         }d | _        | j        r%|                     d|gd| j        d          | _        d| _        d S )Nr   scaleglorot_uniformT)nameshapeinitializerdtype	trainable)_validate_inputsr   r
   
add_weightr   built)r   input_shapedims      r   buildzAdditiveAttention.buildD   so    k***!nR 
> 	e,j )  DJ 


r   c                     t          j        |d          }t          j        |d          }| j        r| j        nd}t          j        |t          j        ||z             z  d          S )a  Calculates attention scores as a nonlinear sum of query and key.

        Args:
            query: Query tensor of shape `(batch_size, Tq, dim)`.
            key: Key tensor of shape `(batch_size, Tv, dim)`.

        Returns:
            Tensor of shape `(batch_size, Tq, Tv)`.
        )axisg      ?r   )r   expand_dimsr
   r   sumtanh)r   querykey
q_reshaped
k_reshapedr   s         r   _calculate_scoresz#AdditiveAttention._calculate_scoresR   sg     _U444
_Sr222
"n5

#wusx
Z(?@@@rJJJJr   c                 N    t                                                      }|d= |S )N
score_mode)r   
get_config)r   base_configr   s     r   r1   zAdditiveAttention.get_configd   s&    gg((**%r   )Tr   )	__name__
__module____qualname____doc__r   r"   r.   r1   __classcell__)r   s   @r   r   r      s        2 2l I I I I I I  K K K$        r   r   N)	keras.srcr   keras.src.api_exportr   $keras.src.layers.attention.attentionr   r   r   r   r   <module>r;      s          - - - - - - : : : : : : .//` ` ` ` `	 ` ` 0/` ` `r   