o
    ={cA                     @   s   d Z ddlm  mZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ edg dG d	d
 d
Z	d"ddZd#ddZdd Zdd ZejfddZedg ddejdfddZdd Zdd Zdd Zdd Zd d! ZdS )$z$Utilities related to loss functions.    N)backend)keras_tensor)tf_utils)keras_exportzkeras.losses.Reduction)v1c                   @   s8   e Zd ZdZdZdZdZdZedd Z	edd	 Z
d
S )ReductionV2a  Types of loss reduction.

    Contains the following values:

    * `AUTO`: Indicates that the reduction option will be determined by the
      usage context. For almost all cases this defaults to
      `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
      built-in training loops such as `tf.keras` `compile` and `fit`, we expect
      reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will
      raise an error.
    * `NONE`: No **additional** reduction is applied to the output of the
      wrapped loss function. When non-scalar losses are returned to Keras
      functions like `fit`/`evaluate`, the unreduced vector loss is passed to
      the optimizer but the reported loss will be a scalar value.

       Caution: **Verify the shape of the outputs when using** `Reduction.NONE`.
       The builtin loss functions wrapped by the loss classes reduce one
       dimension (`axis=-1`, or `axis` if specified by loss function).
       `Reduction.NONE` just means that no **additional** reduction is applied
       by the class wrapper. For categorical losses with an example input shape
       of `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For
       pointwise losses you must include a dummy axis so that `[batch, W, H, 1]`
       is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]`
       will be incorrectly reduced to `[batch, W]`.

    * `SUM`: Scalar sum of weighted losses.
    * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in
       losses.  This reduction type is not supported when used with
       `tf.distribute.Strategy` outside of built-in training loops like
       `tf.keras` `compile`/`fit`.

       You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
       ```
       with strategy.scope():
         loss_obj = tf.keras.losses.CategoricalCrossentropy(
             reduction=tf.keras.losses.Reduction.NONE)
         ....
         loss = tf.reduce_sum(loss_obj(labels, predictions)) *
             (1. / global_batch_size)
       ```

    Please see the [custom training guide](
    https://www.tensorflow.org/tutorials/distribute/custom_training) for more
    details on this.
    autoZnonesumZsum_over_batch_sizec                 C   s   | j | j| j| jfS N)AUTONONESUMSUM_OVER_BATCH_SIZE)cls r   8lib/python3.10/site-packages/keras/utils/losses_utils.pyallQ   s   zReductionV2.allc                 C   s*   ||   vrtd| d|    dd S )NzInvalid Reduction Key: z. Expected keys are "")r   
ValueError)r   keyr   r   r   validateU   s
   zReductionV2.validateN)__name__
__module____qualname____doc__r   r   r   r   classmethodr   r   r   r   r   r   r      s    .
r   c           	         s  t |pd tstt st  j}|j} j}|j}|durj|durj|| }||d krJ|jd 	drJt
dgn||d kr_|jd 	dr_t
 dg  fW  d   S tt  }|du s|jd 	drtt|d |fddfdd|du s|jd 	drtt|d | fdd fd	d  fW  d   S 1 sw   Y  dS )
aH  Squeeze last dim if ranks differ from expected by exactly 1.

    In the common case where we expect shapes to match, `expected_rank_diff`
    defaults to 0, and we squeeze the last dimension of the larger rank if they
    differ by 1.

    But, for example, if `labels` contains class IDs and `predictions` contains
    1 probability per class, we expect `predictions` to have 1 more dimension
    than `labels`, so `expected_rank_diff` would be 1. In this case, we'd
    squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and
    `predictions` if `rank(predictions) - rank(labels) == 2`.

    This will use static shape if available. Otherwise, it will add graph
    operations, which could result in a performance hit.

    Args:
      labels: Label values, a `Tensor` whose dimensions match `predictions`.
      predictions: Predicted values, a `Tensor` of arbitrary dimensions.
      expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
      name: Name of the op.

    Returns:
      Tuple of `labels` and `predictions`, possibly with last dim squeezed.
    remove_squeezable_dimensionsN   c                         t  dgS Nr   tfsqueezer   predictionsr   r   <lambda>       z.remove_squeezable_dimensions.<locals>.<lambda>c                          S r
   r   r   r$   r   r   r&          c                      r   r    r!   r   labelsr   r   r&      r'   c                      r(   r
   r   r   r*   r   r   r&      r)   )r   
name_scoper   Zis_tensor_or_extension_typer"   convert_to_tensorshapendimsZdimsZis_compatible_withr#   rankcondequal)	r+   r%   Zexpected_rank_diffnameZpredictions_shapeZpredictions_rankZlabels_shapeZlabels_rank	rank_diffr   )r+   r%   r   r   ]   sX   







$r   c                    s  j }|j}dur^j }|j}|dur,|dur,|| dks$|d dkr+t\n2tt fddtdt d fdd}ttd|\du rffS j }|j}	|	dkrufS |dur|	dur|	| dkrtdgn||	 dkrtdgfS t}
|
t fddfd	d
  fdd}tt|
dfdd|fS )a  Squeeze or expand last dimension if needed.

    1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
    (using `remove_squeezable_dimensions`).
    2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
    from the new rank of `y_pred`.
    If `sample_weight` is scalar, it is kept scalar.

    This will use static shape if available. Otherwise, it will add graph
    operations, which could result in a performance hit.

    Args:
      y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
      y_true: Optional label `Tensor` whose dimensions match `y_pred`.
      sample_weight: Optional weight scalar or `Tensor` whose dimensions match
        `y_pred`.

    Returns:
      Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
      the last dimension squeezed,
      `sample_weight` could be extended by one dimension.
      If `sample_weight` is None, (y_pred, y_true) is returned.
    Nr   r   c                      s
   t  S r
   )r   r   y_predy_truer   r   r&      s   
 z.squeeze_or_expand_dimensions.<locals>.<lambda>c                      s   t  fddS )Nc                      s    fS r
   r   r   r5   r   r   r&      s    z@squeeze_or_expand_dimensions.<locals>.<lambda>.<locals>.<lambda>)r"   r1   r   )is_last_dim_1squeeze_dimsr6   r7   r   r   r&      s    r   c                      r   r    r!   r   sample_weightr   r   r&      r'   c                     s*   fdd} t t  d| fddS )Nc                      r   r    )r"   expand_dimsr   r:   r   r   r&      r'   zMsqueeze_or_expand_dimensions.<locals>._maybe_expand_weights.<locals>.<lambda>r   c                      r(   r
   r   r   r:   r   r   r&      r)   r"   r1   r2   )Zexpand_weights)r4   r;   r   r   _maybe_expand_weights   s   z;squeeze_or_expand_dimensions.<locals>._maybe_expand_weightsc                      s   t t d S )Nr   r=   r   )r>   maybe_squeeze_weightsr4   r   r   _maybe_adjust_weights   s   z;squeeze_or_expand_dimensions.<locals>._maybe_adjust_weightsc                      r(   r
   r   r   r:   r   r   r&      r)   )	r.   r/   r   r"   r0   r2   r1   r#   r<   )r6   r7   r;   Zy_pred_shapeZy_pred_rankZy_true_shapeZy_true_rankZmaybe_squeeze_dimsZweights_shapeZweights_rankZweights_rank_tensorr@   r   )r>   r8   r?   r4   r;   r9   r6   r7   r   squeeze_or_expand_dimensions   sN   





rA   c                 C   s   t | }t jj||ddS )a:  Computes a safe mean of the losses.

    Args:
      losses: `Tensor` whose elements contain individual loss measurements.
      num_present: The number of measurable elements in `losses`.

    Returns:
      A scalar representing the mean of `losses`. If `num_present` is zero,
        then zero is returned.
    valuer3   )r"   
reduce_sumZmathZdivide_no_nan)lossesZnum_presentZ
total_lossr   r   r   
_safe_mean   s   
rF   c                 C   sH   t d}tjtj| |d| jdW  d   S 1 sw   Y  dS )z3Computes the number of elements in `losses` tensor.Znum_elementsrC   )dtypeN)r   r,   r"   castsizerG   )rE   Zscoper   r   r   _num_elements  s   $rJ   c                 C   s8   |t jkr	| }|S t| }|t jkrt|t| }|S )z2Reduces the individual weighted loss measurements.)r   r   r"   rD   r   rF   rJ   )weighted_losses	reductionlossr   r   r   reduce_weighted_loss  s   


rN   z/keras.__internal__.losses.compute_weighted_lossc           	      C   s  t | |t jkrt j}|du rd}t|pdb |tjj	 _
t| tjtjfs0t| } t|tjtjfs>t|}| jjsN| j}t| d} d}nd}t|| j}t| d|\} }}t| |}t||}|rst||}|W  d   S 1 sw   Y  dS )a  Computes the weighted loss.

    Args:
      losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
      sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
        as `losses`, or be broadcastable to `losses`.
      reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `SUM_OVER_BATCH_SIZE`.
      name: Optional name for the op.

    Raises:
      ValueError: If the shape of `sample_weight` is not compatible with
        `losses`.

    Returns:
      Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
      `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
    N      ?Zweighted_lossfloat32TF)r   r   r   r   r   r,   r"   compatr   Zget_default_graphZ_last_loss_reduction
isinstancer   ZKerasTensorZRaggedTensorr-   rG   is_floatingrH   rA   ZmultiplyrN   )	rE   r;   rL   r3   Zinput_dtypeZinput_casted_rK   rM   r   r   r   compute_weighted_loss!  s<   





$rU   c                 C   s$   t j j}|dkr| d| 9 } | S )zBScales and returns the given loss value by the number of replicas.r   rO   )r"   Z
distributeZget_strategyZnum_replicas_in_sync)Z
loss_valueZnum_replicasr   r   r   scale_loss_for_distributioni  s   rV   c                    st   d | D ](}|j jr$ du s|j j jkr|j  n|j  hddhkr$d |j jr,|   S q r8 fdd| D } | S )ar  Cast a list of losses to a common dtype.

    If any loss is floating-point, they will all be casted to the most-precise
    floating-point loss. Otherwise the losses are not casted. We also skip
    casting losses if there are any complex losses.

    Args:
      losses: A list of losses.

    Returns:
      `losses`, but they have been casted to a common dtype.
    NZbfloat16Zfloat16rP   c                    s   g | ]}t | qS r   )r"   rH   ).0rM   Zhighest_floatr   r   
<listcomp>  s    z/cast_losses_to_common_dtype.<locals>.<listcomp>)rG   rS   rI   Z
is_complex)rE   rM   r   rX   r   cast_losses_to_common_dtypeq  s   rZ   c                 C   s   t | ddS )zReturns Keras mask from tensor.Z_keras_maskN)getattr)y_pr   r   r   get_mask  s   r]   c                 C   sD   |dur t || j}|durt||d\}}}||9 }|S |}|S )z2Applies any mask on predictions to sample weights.Nr:   )r"   rH   rG   rA   )r\   swmaskrT   r   r   r   
apply_mask  s   r`   c                 C   s\   |dur(t || j}|tjtjfv r(t t || j}t |}||| 9 }t| ||S )z;Redistribute sample weights considering only valid entries.N)	r"   rH   rG   r   r   r   rI   rD   r`   )rE   r^   r_   rL   totalZvalidr   r   r   apply_valid_mask  s   	
rb   )r   N)NN)r   Ztensorflow.compat.v2rQ   Zv2r"   Zkerasr   Zkeras.enginer   Zkeras.utilsr   Z tensorflow.python.util.tf_exportr   r   r   rA   rF   rJ   r   rN   rU   rV   rZ   r]   r`   rb   r   r   r   r   <module>   s2   
A

F\

G