
    &Vfv$                     t    d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
  G d dee
j                  Zd ZdS )	    N)backend)KerasVariable)KerasAutoTrackable)base_optimizerc                   z     e Zd ZdZ fdZ	 d fd	Zd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd ZddZ xZS )TFOptimizera  A class for Tensorflow specific optimizer logic.

    The major behavior change for this class is for tf.distribute.

    It will override methods from base Keras core Optimizer,
    which provide distribute specific functionality, e.g. variable
    creation, loss reduction, etc.
    c                      t                      j        |i | t          j                                        | _        d S N)super__init__tf
distributeget_strategy_distribution_strategy)selfargskwargs	__class__s      c/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/backend/tensorflow/optimizer.pyr   zTFOptimizer.__init__   s;    $)&)))&(m&@&@&B&B###    Nzerosc                    t          |t          j                  r|j        }n|}| j        j                            |          5  t                                          |||          cd d d            S # 1 swxY w Y   d S )N)nameinitializer)	
isinstancer   Variablevaluer   extendedcolocate_vars_withr   add_variable_from_reference)r   reference_variabler   r   colocate_varr   s        r   r    z'TFOptimizer.add_variable_from_reference   s     ('*:;; 	.-3LL-L(1DD
 
 	 	 7766"; 7  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   $A66A:=A:c                      t          d          )Nzhstateless_apply is not supported with the TensorFlow backend (as it is incompatible with tf.distribute).)
ValueError)r   optimizer_variablesgradstrainable_variabless       r   stateless_applyzTFOptimizer.stateless_apply(   s     :
 
 	
r   c                     t          |t                    r|j        }t          j        ||j                  }t          |t          j                  r|                    |           d S |                    |           d S r
   )	r   r   r   r   castdtypeIndexedSlicesscatter_updateassignr   variabler   s      r   r.   zTFOptimizer.assign0   su    h.. 	&~Hx~..eR-.. 	###E*****OOE"""""r   c                     t          |t                    r|j        }t          j        ||j                  }t          |t          j                  r|                    |           d S |                    |           d S r
   )	r   r   r   r   r*   r+   r,   scatter_add
assign_addr/   s      r   r3   zTFOptimizer.assign_add9   w    h.. 	&~Hx~..eR-.. 	'  '''''&&&&&r   c                     t          |t                    r|j        }t          j        ||j                  }t          |t          j                  r|                    |           d S |                    |           d S r
   )	r   r   r   r   r*   r+   r,   scatter_sub
assign_subr/   s      r   r7   zTFOptimizer.assign_subB   r4   r   c                 V   t          |t          j                  r|j        }t	          |d          r|                                }n]t          |t          j        j                  r>t	          |d          r.t	          |j	        d          r|j	                                        }|j
        S )N_distributed_containerhandle)r   r   r   r   hasattrr9   r   __internal__CompositeTensorr:   
_unique_id)r   r0   s     r   _var_keyzTFOptimizer._var_keyK   s    h 011 	&~H8566 		@6688HHx!@AA	@(++	@ )ABB	@  ==??H""r   c                       j         d S  fd}t          j        j        j                            | j        |           d S )Nc                     fd}|D ]@}t          |t          j                  r|j        }| j                            ||d           Ad S )Nc                                          |           r[t          j        j        | j                  }t          j        j        | j                  }|                     | |z  |z             d S d S r
   )_use_weight_decayr   r*   learning_rater+   weight_decayr7   )r0   lrwdr   s      r   weight_decay_fnz`TFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay.<locals>.weight_decay_fn_   sr    ))(33 <!3X^DDB!2HNCCB''2(:;;;;;< <r   F)group)r   r   r   r   r   update)distribution	variablesr   rH   r0   r   s        r   distributed_apply_weight_decayzGTFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay^   s    < < < < < &  h(899 .'~H%,,oU -     r   )rE   r   r<   r   interimmaybe_merge_callr   )r   rL   rM   s   `  r   _apply_weight_decayzTFOptimizer._apply_weight_decayZ   sb    $F	 	 	 	 	 	"*;;*'	
 	
 	
 	
 	
r   c           
          d |D             }t           j        j        j                            | j        | j        t          t          ||                    |           d S )Nc                 T    g | ]%}t          |t          j                  r|j        n|&S  r   r   r   r   .0vs     r   
<listcomp>z4TFOptimizer._backend_update_step.<locals>.<listcomp>s   @     
 
 
 "!W%566=AGGA
 
 
r   )	r   r<   r   rN   rO   _distributed_tf_update_stepr   listzip)r   r&   r'   rD   s       r   _backend_update_stepz TFOptimizer._backend_update_stepr   sr    
 
(
 
 
 	"*;;,'U/0011		
 	
 	
 	
 	
r   c                                            |          } fd}|D ]%\  }}|j                            ||||fd           &d S )Nc                 2                         || |          S r
   )update_step)vargradrD   r   s      r   apply_grad_to_update_varzITFOptimizer._distributed_tf_update_step.<locals>.apply_grad_to_update_var   s    ##D#}===r   Fr   rI   )_all_reduce_sum_gradientsr   rJ   )r   rK   grads_and_varsrD   rc   rb   ra   s   `      r   rZ   z'TFOptimizer._distributed_tf_update_step~   s     77GG	> 	> 	> 	> 	> ( 	 	ID#!(((M*	 )    	 	r   c                    t           j                                        }|s|S t          |          }t	          |          }|rSd |D             }t           j                                                            t           j        j        j        |          }ng }g }d}|D ]A\  }}	||                    d|	f           |                    ||         |	f           |dz  }B|t          |          k    s
J d            |S )a  Returns all-reduced gradients aggregated via summation.

        Args:
            grads_and_vars: List of (gradient, variable) pairs.

        Returns:
            List of (gradient, variable) pairs
            where gradients have been all-reduced.
        c                     g | ]
}|d          S )r   rS   )rV   pairs     r   rX   z9TFOptimizer._all_reduce_sum_gradients.<locals>.<listcomp>   s    AAAT!WAAAr   r   N   zFailed to add all gradients)
r   r   get_replica_contextr[   filter_empty_gradients
all_reduceReduceOpSUMappendlen)
r   rf   replica_contextfiltered_grads_and_varsr&   reducedreduced_with_nonesreduced_posgrW   s
             r   re   z%TFOptimizer._all_reduce_sum_gradients   s#    -;;== 	"!!n--"8"H"H" 	AA)@AAAEm7799DD&*E GG G" 	! 	!DAqy"))4)4444"))7;+?*CDDDq c'll***,I***!!r   c                     d |D             }t          || j                  D ])\  }}| j        j                            |d |f           *dS )zOverwrite model variables with their moving average values.

        This function overwrites variables on each device.

        Args:
          var_list: list of model variables.
        c                 T    g | ]%}t          |t          j                  r|j        n|&S rS   rT   rU   s     r   rX   zMTFOptimizer._overwrite_model_variables_with_average_value.<locals>.<listcomp>   rY   r   c                 ,    |                      |          S r
   r.   )abs     r   <lambda>zKTFOptimizer._overwrite_model_variables_with_average_value.<locals>.<lambda>   s    !((1++ r   )r   N)r\   _model_variables_moving_averager   r   rJ   )r   r'   ra   average_vars       r   -_overwrite_model_variables_with_average_valuez9TFOptimizer._overwrite_model_variables_with_average_value   s    
 
(
 
 

 !$!E!
 !
 	 	C '077--[N 8    	 	r   c                     d d |D             }fd}t           j        j        j                            || j        ||           d S )Nc                 6    |                      | |z              d S r
   r{   )ra   rb   s     r   update_accumulatorzPTFOptimizer._backend_increment_gradient_accumulators.<locals>.update_accumulator   s    JJsTz"""""r   c                     g | ]	}|j         
S rS   )r   rU   s     r   rX   zHTFOptimizer._backend_increment_gradient_accumulators.<locals>.<listcomp>   s    333A333r   c                 r    t          ||          D ]$\  }}| j                            ||fd           %d S )NFrd   )r\   r   rJ   )rK   r&   accumulatorsrb   ra   r   s        r   "_distributed_tf_increment_grad_accz`TFOptimizer._backend_increment_gradient_accumulators.<locals>._distributed_tf_increment_grad_acc   s]     !55  	c%,,+4' -     r   )r   r<   r   rN   rO   r   )r   r&   	acc_gradsr   r   r   s        @r   (_backend_increment_gradient_accumulatorsz4TFOptimizer._backend_increment_gradient_accumulators   s{    	# 	# 	# 43333	 	 	 	 	 	"*;;.'		
 	
 	
 	
 	
r   c                 8    t          j        || j        |          S r
   )r   clip_by_normclipnorm)r   valuesaxess      r   _clip_by_normzTFOptimizer._clip_by_norm   s     vt}d;;;r   )Nr   r
   )__name__
__module____qualname____doc__r   r    r(   r.   r3   r7   r?   rP   r]   rZ   re   r   r   r   __classcell__)r   s   @r   r   r      s%        C C C C C
 :A     
 
 
# # #' ' '' ' '# # #
 
 
0

 

 

   !" !" !"F  ,
 
 
*< < < < < < < <r   r   c                 L   t          |           } | s| S g }g }| D ]4\  }}||                    |           |                    ||f           5t          |          }|s#d | D             f}t          d| d|  d          |rt          j        dd |D                        |S )zDFilter out `(grad, var)` pairs that have a gradient equal to `None`.Nc                 "    g | ]\  }}|j         S rS   r   )rV   _rW   s      r   rX   z*filter_empty_gradients.<locals>.<listcomp>   s    7771QV777r   z(No gradients provided for any variable: z. Provided `grads_and_vars` is .zGradients do not exist for variables %s when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?c                     g | ]	}|j         
S rS   r   rU   s     r   rX   z*filter_empty_gradients.<locals>.<listcomp>   s    444af444r   )tuplerp   r$   warningswarn)rf   filteredvars_with_empty_gradsrb   ra   r0   s         r   rl   rl      s   >**N H# ) )	c<!((----OOT3K((((XH 
777779>x > >,:> > >
 
 	
  
) 543444		
 	
 	
 Or   )r   
tensorflowr   	keras.srcr   keras.src.backend.commonr   &keras.src.backend.tensorflow.trackabler   keras.src.optimizersr   BaseOptimizerr   rl   rS   r   r   <module>r      s               2 2 2 2 2 2 E E E E E E / / / / / /T< T< T< T< T<$n&B T< T< T<n    r   