
    &Vf                         d dl mZ d dlmZ d dlmZ  edg           G d dej                              Zej        	                    dej
                  e_        dS )	    )ops)keras_export)	optimizerzkeras.optimizers.Adamaxc                   Z     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd
	Z fdZd Z fdZ xZS )Adamaxa  Optimizer that implements the Adamax algorithm.

    Adamax, a variant of Adam based on the infinity norm, is a first-order
    gradient-based optimization method. Due to its capability of adjusting the
    learning rate based on data characteristics, it is suited to learn
    time-variant process, e.g., speech data with dynamically changed noise
    conditions. Default parameters follow those provided in the paper (see
    references below).

    Initialization:

    ```python
    m = 0  # Initialize initial 1st moment vector
    u = 0  # Initialize the exponentially weighted infinity norm
    t = 0  # Initialize timestep
    ```

    The update rule for parameter `w` with gradient `g` is described at the end
    of section 7.1 of the paper (see the referenece section):

    ```python
    t += 1
    m = beta1 * m + (1 - beta) * g
    u = max(beta2 * u, abs(g))
    current_lr = learning_rate / (1 - beta1 ** t)
    w = w - current_lr * m / (u + epsilon)
    ```

    Args:
        learning_rate: A float, a
            `keras.optimizers.schedules.LearningRateSchedule` instance, or
            a callable that takes no arguments and returns the actual value to
            use. The learning rate. Defaults to `0.001`.
        beta_1: A float value or a constant float tensor. The exponential decay
            rate for the 1st moment estimates.
        beta_2: A float value or a constant float tensor. The exponential decay
            rate for the exponentially weighted infinity norm.
        epsilon: A small constant for numerical stability.
            {{base_optimizer_keyword_args}}

    Reference:

    - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
    MbP??+?Hz>NFGz?adamaxc                 |     t                      j        d|||||||	|
|||d| || _        || _        || _        d S )N)learning_ratenameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyloss_scale_factorgradient_accumulation_steps )super__init__beta_1beta_2epsilon)selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                   X/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/optimizers/adamax.pyr   zAdamax.__init__5   sp    $ 	 	
'%+%$;/(C	
 	
 	
 	
 	
     c                 >   | j         rdS t                                          |           g | _        g | _        |D ]`}| j                            |                     |d                     | j                            |                     |d                     adS )a  Initialize optimizer variables.

        Adamax optimizer has 2 types of variables: momentums (denoted as m),
        exponentially weighted infinity norm (denoted as u).

        Args:
            var_list: list of model variables to build Adamax variables on.
        Nmomentum)reference_variabler   norm)builtr   build_m_uappendadd_variable_from_reference)r    var_listvarr"   s      r#   r*   zAdamax.buildY   s     : 	Fh 
	 
	CGNN00'* 1    
 GNN00'* 1     
	 
	r$   c                    t          j        ||j                  }t          j        ||j                  }t          j        | j        dz   |j                  }t          j        t          j        | j        |j                  |          }| j        |                     |                   }| j        |                     |                   }| 	                    |t          j
        t          j        ||          d| j        z
                       |                     |t          j        t          j
        | j        |          t          j        |                               |                     |t          j        t          j
        ||          t          j
        d|z
  t          j        || j                                                 dS )z=Update step given gradient and the associated model variable.   N)r   castdtype
iterationspowerr   r+   _get_variable_indexr,   
assign_addmultiplysubtractassignmaximumr   abs
assign_subdivideaddr   )	r    gradientvariabler   lr
local_stepbeta_1_powermus	            r#   update_stepzAdamax.update_steps   s}   XmX^448Hhn55Xdo18>BB
yHT[(.11:
 
 GD,,X667GD,,X667s|CL155DKII	
 	
 	
 	s{3<Q779J9JKK	
 	
 	
 	JR##a,.DL1I1IJJ 	
 	
 	
 	
 	
r$   c                     t                                                      }|                    | j        | j        | j        d           |S )N)r   r   r   )r   
get_configupdater   r   r   )r    configr"   s     r#   rJ   zAdamax.get_config   sN    ##%%++< 	
 	
 	
 r$   )r   r	   r
   r   NNNNFr   NNNr   )	__name__
__module____qualname____doc__r   r*   rH   rJ   __classcell__)r"   s   @r#   r   r      s        + +^  $$(" " " " " "H    4
 
 
4
 
 
 
 
 
 
 
 
r$   r   z{{base_optimizer_keyword_args}}N)	keras.srcr   keras.src.api_exportr   keras.src.optimizersr   	Optimizerr   rP   replacebase_optimizer_keyword_argsr   r$   r#   <module>rX      s          - - - - - - * * * * * * ()**P P P P PY  P P +*Pf ''%y'L r$   