Module keras.losses

Built-in loss functions.

Expand source code
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=g-classes-have-attributes
"""Built-in loss functions."""

import tensorflow.compat.v2 as tf

import abc
import functools
from keras import backend
from keras.utils import losses_utils
from keras.utils import tf_utils
from keras.utils.generic_utils import deserialize_keras_object
from keras.utils.generic_utils import serialize_keras_object
from tensorflow.python.ops.ragged import ragged_map_ops
from tensorflow.python.ops.ragged import ragged_util
from tensorflow.python.util import dispatch
from tensorflow.python.util.tf_export import keras_export
from tensorflow.tools.docs import doc_controls


@keras_export('keras.losses.Loss')
class Loss:
  """Loss base class.

  To be implemented by subclasses:
  * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.

  Example subclass implementation:

  ```python
  class MeanSquaredError(Loss):

    def call(self, y_true, y_pred):
      y_pred = tf.convert_to_tensor_v2(y_pred)
      y_true = tf.cast(y_true, y_pred.dtype)
      return tf.reduce_mean(math_ops.square(y_pred - y_true), axis=-1)
  ```

  When used with `tf.distribute.Strategy`, outside of built-in training loops
  such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction
  types, and reduce losses explicitly in your training loop. Using 'AUTO' or
  'SUM_OVER_BATCH_SIZE' will raise an error.

  Please see this custom training [tutorial](
    https://www.tensorflow.org/tutorials/distribute/custom_training) for more
  details on this.

  You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:

  ```python
  with strategy.scope():
    loss_obj = tf.keras.losses.CategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE)
    ....
    loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
            (1. / global_batch_size))
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):
    """Initializes `Loss` class.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
    """
    losses_utils.ReductionV2.validate(reduction)
    self.reduction = reduction
    self.name = name
    # SUM_OVER_BATCH is only allowed in losses managed by `fit` or
    # CannedEstimators.
    self._allow_sum_over_batch_size = False
    self._set_name_scope()

  def _set_name_scope(self):
    """Creates a valid `name_scope` name."""
    if self.name is None:
      self._name_scope = self.__class__.__name__
    elif self.name == '<lambda>':
      self._name_scope = 'lambda'
    else:
      # E.g. '_my_loss' => 'my_loss'
      self._name_scope = self.name.strip('_')

  def __call__(self, y_true, y_pred, sample_weight=None):
    """Invokes the `Loss` instance.

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
        sparse loss functions such as sparse categorical crossentropy where
        shape = `[batch_size, d0, .. dN-1]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
      sample_weight: Optional `sample_weight` acts as a coefficient for the
        loss. If a scalar is provided, then the loss is simply scaled by the
        given value. If `sample_weight` is a tensor of size `[batch_size]`, then
        the total loss for each sample of the batch is rescaled by the
        corresponding element in the `sample_weight` vector. If the shape of
        `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
        this shape), then each loss element of `y_pred` is scaled
        by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
          functions reduce by 1 dimension, usually axis=-1.)

    Returns:
      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
        shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1`
        because all loss functions reduce by 1 dimension, usually axis=-1.)

    Raises:
      ValueError: If the shape of `sample_weight` is invalid.
    """
    # If we are wrapping a lambda function strip '<>' from the name as it is not
    # accepted in scope name.
    graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
        y_true, y_pred, sample_weight)
    with backend.name_scope(self._name_scope), graph_ctx:
      if tf.executing_eagerly():
        call_fn = self.call
      else:
        call_fn = tf.__internal__.autograph.tf_convert(self.call, tf.__internal__.autograph.control_status_ctx())
      losses = call_fn(y_true, y_pred)
      return losses_utils.compute_weighted_loss(
          losses, sample_weight, reduction=self._get_reduction())

  @classmethod
  def from_config(cls, config):
    """Instantiates a `Loss` from its config (output of `get_config()`).

    Args:
        config: Output of `get_config()`.

    Returns:
        A `Loss` instance.
    """
    return cls(**config)

  def get_config(self):
    """Returns the config dictionary for a `Loss` instance."""
    return {'reduction': self.reduction, 'name': self.name}

  @abc.abstractmethod
  @doc_controls.for_subclass_implementers
  def call(self, y_true, y_pred):
    """Invokes the `Loss` instance.

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
        sparse loss functions such as sparse categorical crossentropy where
        shape = `[batch_size, d0, .. dN-1]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

    Returns:
      Loss values with the shape `[batch_size, d0, .. dN-1]`.
    """
    raise NotImplementedError('Must be implemented in subclasses.')

  def _get_reduction(self):
    """Handles `AUTO` reduction cases and returns the reduction value."""
    if (not self._allow_sum_over_batch_size and
        tf.distribute.has_strategy() and
        (self.reduction == losses_utils.ReductionV2.AUTO or
         self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)):
      raise ValueError(
          'Please use `tf.keras.losses.Reduction.SUM` or '
          '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are '
          'used with `tf.distribute.Strategy` outside of the built-in training '
          'loops. You can implement '
          '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch '
          'size like:\n```\nwith strategy.scope():\n'
          '    loss_obj = tf.keras.losses.CategoricalCrossentropy('
          'reduction=tf.keras.losses.Reduction.NONE)\n....\n'
          '    loss = tf.reduce_sum(loss_obj(labels, predictions)) * '
          '(1. / global_batch_size)\n```\nPlease see '
          'https://www.tensorflow.org/tutorials/distribute/custom_training'
          ' for more details.')

    if self.reduction == losses_utils.ReductionV2.AUTO:
      return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
    return self.reduction


class LossFunctionWrapper(Loss):
  """Wraps a loss function in the `Loss` class."""

  def __init__(self,
               fn,
               reduction=losses_utils.ReductionV2.AUTO,
               name=None,
               **kwargs):
    """Initializes `LossFunctionWrapper` class.

    Args:
      fn: The loss function to wrap, with signature `fn(y_true, y_pred,
        **kwargs)`.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
      **kwargs: The keyword arguments that are passed on to `fn`.
    """
    super().__init__(reduction=reduction, name=name)
    self.fn = fn
    self._fn_kwargs = kwargs

  def call(self, y_true, y_pred):
    """Invokes the `LossFunctionWrapper` instance.

    Args:
      y_true: Ground truth values.
      y_pred: The predicted values.

    Returns:
      Loss values per sample.
    """
    if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
      y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)

    ag_fn = tf.__internal__.autograph.tf_convert(self.fn, tf.__internal__.autograph.control_status_ctx())
    return ag_fn(y_true, y_pred, **self._fn_kwargs)

  def get_config(self):
    config = {}
    for k, v in self._fn_kwargs.items():
      config[k] = backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))


@keras_export('keras.losses.MeanSquaredError')
class MeanSquaredError(LossFunctionWrapper):
  """Computes the mean of squares of errors between labels and predictions.

  `loss = square(y_true - y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError()
  >>> mse(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.25

  >>> # Using 'sum' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mse(y_true, y_pred).numpy()
  1.0

  >>> # Using 'none' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mse(y_true, y_pred).numpy()
  array([0.5, 0.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_squared_error'):
    """Initializes `MeanSquaredError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'mean_squared_error'.
    """
    super().__init__(mean_squared_error, name=name, reduction=reduction)


@keras_export('keras.losses.MeanAbsoluteError')
class MeanAbsoluteError(LossFunctionWrapper):
  """Computes the mean of absolute difference between labels and predictions.

  `loss = abs(y_true - y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError()
  >>> mae(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.25

  >>> # Using 'sum' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mae(y_true, y_pred).numpy()
  1.0

  >>> # Using 'none' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mae(y_true, y_pred).numpy()
  array([0.5, 0.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_absolute_error'):
    """Initializes `MeanAbsoluteError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'mean_absolute_error'.
    """
    super().__init__(mean_absolute_error, name=name, reduction=reduction)


@keras_export('keras.losses.MeanAbsolutePercentageError')
class MeanAbsolutePercentageError(LossFunctionWrapper):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * abs(y_true - y_pred) / y_true`

  Standalone usage:

  >>> y_true = [[2., 1.], [2., 3.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
  >>> mape(y_true, y_pred).numpy()
  50.

  >>> # Calling with 'sample_weight'.
  >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  20.

  >>> # Using 'sum' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mape(y_true, y_pred).numpy()
  100.

  >>> # Using 'none' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mape(y_true, y_pred).numpy()
  array([25., 75.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.MeanAbsolutePercentageError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_absolute_percentage_error'):
    """Initializes `MeanAbsolutePercentageError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'mean_absolute_percentage_error'.
    """
    super().__init__(
        mean_absolute_percentage_error, name=name, reduction=reduction)


@keras_export('keras.losses.MeanSquaredLogarithmicError')
class MeanSquaredLogarithmicError(LossFunctionWrapper):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
  >>> msle(y_true, y_pred).numpy()
  0.240

  >>> # Calling with 'sample_weight'.
  >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.120

  >>> # Using 'sum' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> msle(y_true, y_pred).numpy()
  0.480

  >>> # Using 'none' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> msle(y_true, y_pred).numpy()
  array([0.240, 0.240], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.MeanSquaredLogarithmicError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_squared_logarithmic_error'):
    """Initializes `MeanSquaredLogarithmicError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'mean_squared_logarithmic_error'.
    """
    super().__init__(
        mean_squared_logarithmic_error, name=name, reduction=reduction)


@keras_export('keras.losses.BinaryCrossentropy')
class BinaryCrossentropy(LossFunctionWrapper):
  """Computes the cross-entropy loss between true labels and predicted labels.

  Use this cross-entropy loss for binary (0 or 1) classification applications.
  The loss function requires the following inputs:

  - `y_true` (true label): This is either 0 or 1.
  - `y_pred` (predicted value): This is the model's prediction, i.e, a single
    floating-point value which either represents a
    [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
    when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
    `from_logits=False`).

  **Recommended Usage:** (set `from_logits=True`)

  With `tf.keras` API:

  ```python
  model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    ....
  )
  ```

  As a standalone function:

  >>> # Example 1: (batch_size = 1, number of samples = 4)
  >>> y_true = [0, 1, 0, 0]
  >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
  >>> bce(y_true, y_pred).numpy()
  0.865

  >>> # Example 2: (batch_size = 2, number of samples = 4)
  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
  >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
  >>> bce(y_true, y_pred).numpy()
  0.865
  >>> # Using 'sample_weight' attribute
  >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.243
  >>> # Using 'sum' reduction` type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> bce(y_true, y_pred).numpy()
  1.730
  >>> # Using 'none' reduction type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> bce(y_true, y_pred).numpy()
  array([0.235, 1.496], dtype=float32)

  **Default Usage:** (set `from_logits=False`)

  >>> # Make the following updates to the above "Recommended Usage" section
  >>> # 1. Set `from_logits=False`
  >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
  >>> # 2. Update `y_pred` to use probabilities instead of logits
  >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
  """

  def __init__(self,
               from_logits=False,
               label_smoothing=0,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='binary_crossentropy'):
    """Initializes `BinaryCrossentropy` instance.

    Args:
      from_logits: Whether to interpret `y_pred` as a tensor of
        [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
          assume that `y_pred` contains probabilities (i.e., values in [0, 1]).
      label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0,
        we compute the loss between the predicted labels and a smoothed version
        of the true labels, where the smoothing squeezes the labels towards 0.5.
        Larger values of `label_smoothing` correspond to heavier smoothing.
      axis: The axis along which to compute crossentropy (the features axis).
        Defaults to -1.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Name for the op. Defaults to 'binary_crossentropy'.
    """
    super().__init__(
        binary_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits,
        label_smoothing=label_smoothing,
        axis=axis)
    self.from_logits = from_logits


@keras_export('keras.losses.CategoricalCrossentropy')
class CategoricalCrossentropy(LossFunctionWrapper):
  """Computes the crossentropy loss between the labels and predictions.

  Use this crossentropy loss function when there are two or more label classes.
  We expect labels to be provided in a `one_hot` representation. If you want to
  provide labels as integers, please use `SparseCategoricalCrossentropy` loss.
  There should be `# classes` floating point values per feature.

  In the snippet below, there is `# classes` floating pointing values per
  example. The shape of both `y_pred` and `y_true` are
  `[batch_size, num_classes]`.

  Standalone usage:

  >>> y_true = [[0, 1, 0], [0, 0, 1]]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy()
  >>> cce(y_true, y_pred).numpy()
  1.177

  >>> # Calling with 'sample_weight'.
  >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
  0.814

  >>> # Using 'sum' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> cce(y_true, y_pred).numpy()
  2.354

  >>> # Using 'none' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> cce(y_true, y_pred).numpy()
  array([0.0513, 2.303], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy())
  ```
  """

  def __init__(self,
               from_logits=False,
               label_smoothing=0,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='categorical_crossentropy'):
    """Initializes `CategoricalCrossentropy` instance.

    Args:
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
        meaning the confidence on label values are relaxed. For example, if
        `0.1`, use `0.1 / num_classes` for non-target labels and
        `0.9 + 0.1 / num_classes` for target labels.
      axis: The axis along which to compute crossentropy (the features axis).
        Defaults to -1.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
        Defaults to 'categorical_crossentropy'.
    """
    super().__init__(
        categorical_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits,
        label_smoothing=label_smoothing,
        axis=axis)


@keras_export('keras.losses.SparseCategoricalCrossentropy')
class SparseCategoricalCrossentropy(LossFunctionWrapper):
  """Computes the crossentropy loss between the labels and predictions.

  Use this crossentropy loss function when there are two or more label classes.
  We expect labels to be provided as integers. If you want to provide labels
  using `one-hot` representation, please use `CategoricalCrossentropy` loss.
  There should be `# classes` floating point values per feature for `y_pred`
  and a single floating point value per feature for `y_true`.

  In the snippet below, there is a single floating point value per example for
  `y_true` and `# classes` floating pointing values per example for `y_pred`.
  The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
  `[batch_size, num_classes]`.

  Standalone usage:

  >>> y_true = [1, 2]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
  >>> scce(y_true, y_pred).numpy()
  1.177

  >>> # Calling with 'sample_weight'.
  >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
  0.814

  >>> # Using 'sum' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> scce(y_true, y_pred).numpy()
  2.354

  >>> # Using 'none' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> scce(y_true, y_pred).numpy()
  array([0.0513, 2.303], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.SparseCategoricalCrossentropy())
  ```
  """

  def __init__(self,
               from_logits=False,
               reduction=losses_utils.ReductionV2.AUTO,
               name='sparse_categorical_crossentropy'):
    """Initializes `SparseCategoricalCrossentropy` instance.

    Args:
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'sparse_categorical_crossentropy'.
    """
    super().__init__(
        sparse_categorical_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits)


@keras_export('keras.losses.Hinge')
class Hinge(LossFunctionWrapper):
  """Computes the hinge loss between `y_true` and `y_pred`.

  `loss = maximum(1 - y_true * y_pred, 0)`

  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
  provided we will convert them to -1 or 1.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.Hinge()
  >>> h(y_true, y_pred).numpy()
  1.3

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.55

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.Hinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  2.6

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.Hinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.1, 1.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'):
    """Initializes `Hinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'hinge'.
    """
    super().__init__(hinge, name=name, reduction=reduction)


@keras_export('keras.losses.SquaredHinge')
class SquaredHinge(LossFunctionWrapper):
  """Computes the squared hinge loss between `y_true` and `y_pred`.

  `loss = square(maximum(1 - y_true * y_pred, 0))`

  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
  provided we will convert them to -1 or 1.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.SquaredHinge()
  >>> h(y_true, y_pred).numpy()
  1.86

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.73

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.SquaredHinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  3.72

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.SquaredHinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.46, 2.26], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='squared_hinge'):
    """Initializes `SquaredHinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'squared_hinge'.
    """
    super().__init__(squared_hinge, name=name, reduction=reduction)


@keras_export('keras.losses.CategoricalHinge')
class CategoricalHinge(LossFunctionWrapper):
  """Computes the categorical hinge loss between `y_true` and `y_pred`.

  `loss = maximum(neg - pos + 1, 0)`
  where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge()
  >>> h(y_true, y_pred).numpy()
  1.4

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.6

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  2.8

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.2, 1.6], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='categorical_hinge'):
    """Initializes `CategoricalHinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'categorical_hinge'.
    """
    super().__init__(categorical_hinge, name=name, reduction=reduction)


@keras_export('keras.losses.Poisson')
class Poisson(LossFunctionWrapper):
  """Computes the Poisson loss between `y_true` and `y_pred`.

  `loss = y_pred - y_true * log(y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [0., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> p = tf.keras.losses.Poisson()
  >>> p(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.4

  >>> # Using 'sum' reduction type.
  >>> p = tf.keras.losses.Poisson(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> p(y_true, y_pred).numpy()
  0.999

  >>> # Using 'none' reduction type.
  >>> p = tf.keras.losses.Poisson(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> p(y_true, y_pred).numpy()
  array([0.999, 0.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'):
    """Initializes `Poisson` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'poisson'.
    """
    super().__init__(poisson, name=name, reduction=reduction)


@keras_export('keras.losses.LogCosh')
class LogCosh(LossFunctionWrapper):
  """Computes the logarithm of the hyperbolic cosine of the prediction error.

  `logcosh = log((exp(x) + exp(-x))/2)`,
  where x is the error `y_pred - y_true`.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [0., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> l = tf.keras.losses.LogCosh()
  >>> l(y_true, y_pred).numpy()
  0.108

  >>> # Calling with 'sample_weight'.
  >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.087

  >>> # Using 'sum' reduction type.
  >>> l = tf.keras.losses.LogCosh(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> l(y_true, y_pred).numpy()
  0.217

  >>> # Using 'none' reduction type.
  >>> l = tf.keras.losses.LogCosh(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> l(y_true, y_pred).numpy()
  array([0.217, 0.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='log_cosh'):
    """Initializes `LogCosh` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'log_cosh'.
    """
    super().__init__(log_cosh, name=name, reduction=reduction)


@keras_export('keras.losses.KLDivergence')
class KLDivergence(LossFunctionWrapper):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> kl = tf.keras.losses.KLDivergence()
  >>> kl(y_true, y_pred).numpy()
  0.458

  >>> # Calling with 'sample_weight'.
  >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.366

  >>> # Using 'sum' reduction type.
  >>> kl = tf.keras.losses.KLDivergence(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> kl(y_true, y_pred).numpy()
  0.916

  >>> # Using 'none' reduction type.
  >>> kl = tf.keras.losses.KLDivergence(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> kl(y_true, y_pred).numpy()
  array([0.916, -3.08e-06], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='kl_divergence'):
    """Initializes `KLDivergence` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'kl_divergence'.
    """
    super().__init__(kl_divergence, name=name, reduction=reduction)


@keras_export('keras.losses.Huber')
class Huber(LossFunctionWrapper):
  """Computes the Huber loss between `y_true` and `y_pred`.

  For each value x in `error = y_true - y_pred`:

  ```
  loss = 0.5 * x^2                  if |x| <= d
  loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.Huber()
  >>> h(y_true, y_pred).numpy()
  0.155

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.09

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.Huber(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  0.31

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.Huber(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([0.18, 0.13], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())
  ```
  """

  def __init__(self,
               delta=1.0,
               reduction=losses_utils.ReductionV2.AUTO,
               name='huber_loss'):
    """Initializes `Huber` instance.

    Args:
      delta: A float, the point where the Huber loss function changes from a
        quadratic to linear.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'huber_loss'.
    """
    super().__init__(huber, name=name, reduction=reduction, delta=delta)


@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
              'keras.metrics.MSE', 'keras.losses.mean_squared_error',
              'keras.losses.mse', 'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred):
  """Computes the mean squared error between labels and predictions.

  After computing the squared distance between the inputs, the mean value over
  the last dimension is returned.

  `loss = mean(square(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)


def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False):
  """Apply a loss function on a per batch basis.

  Args:
    loss_fn: The loss function
    y_true: truth values (RaggedTensor)
    y_pred: predicted values (RaggedTensor)
    y_pred_extra_dim: whether y_pred has an additional dimension compared to
      y_true

  Returns:
    Loss-function result. A dense tensor if the output has a single dimension
    (per-batch loss value); a ragged tensor otherwise.
  """

  def rt_is_equiv_dense(rt):
    """Returns true if this RaggedTensor has the same row_lenghts across

       all ragged dimensions and thus can be converted to a dense tensor
       without loss of information.

    Args:
      rt: RaggedTensor.
    """
    return tf.reduce_all([
        tf.equal(
            tf.math.reduce_variance(tf.cast(row_lens, backend.floatx())),
            tf.constant([0.])) for row_lens in rt.nested_row_lengths()
    ])

  def _convert_to_dense(inputs):
    return tuple(
        rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt
        for rt in inputs)

  def _call_loss(inputs, ragged_output):
    """ Adapt the result to ragged or dense tensor according to the expected

        output type. This is done so that all the return values of the map
        operation have the same type.
    """
    r = loss_fn(*inputs)
    if ragged_output and not isinstance(r, tf.RaggedTensor):
      r = tf.RaggedTensor.from_tensor(r)
    elif not ragged_output and isinstance(r, tf.RaggedTensor):
      r = r.to_tensor()
    return r

  def _wrapper(inputs, ragged_output):
    _, y_pred = inputs
    if isinstance(y_pred, tf.RaggedTensor):
      return tf.cond(
          rt_is_equiv_dense(y_pred),
          lambda: _call_loss(_convert_to_dense(inputs), ragged_output),
          lambda: _call_loss(inputs, ragged_output))

    return loss_fn(*inputs)

  if not isinstance(y_true, tf.RaggedTensor):
    return loss_fn(y_true, y_pred.to_tensor())

  lshape = y_pred.shape.as_list()[1:-1]
  if len(lshape) > 0:
    spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype)
  else:
    spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype)

  nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)]
  if y_pred_extra_dim:
    # The last dimension of a categorical prediction may be ragged or not.
    rdims = [len(slist) for slist in nested_splits_list]
    if rdims[0] == rdims[1] - 1:
      nested_splits_list[1] = nested_splits_list[1][:-1]

  map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1)

  assertion_list = ragged_util.assert_splits_match(nested_splits_list)
  with tf.control_dependencies(assertion_list):
    return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec)


@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor)
def _ragged_tensor_mse(y_true, y_pred):
  """Implements support for handling RaggedTensors.

  Args:
    y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
    When the number of dimensions of the batch feature vector [d0, .. dN] is
    greater than one the return value is a RaggedTensor. Otherwise a Dense
    tensor with dimensions [batch_size] is returned.
  """
  return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred)


@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
              'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
              'keras.losses.mae', 'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred):
  """Computes the mean absolute error between labels and predictions.

  `loss = mean(abs(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.abs(y_pred - y_true), axis=-1)


@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor)
def _ragged_tensor_mae(y_true, y_pred):
  """RaggedTensor adapter for mean_absolute_error."""
  return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred)


@keras_export('keras.metrics.mean_absolute_percentage_error',
              'keras.metrics.mape', 'keras.metrics.MAPE',
              'keras.losses.mean_absolute_percentage_error',
              'keras.losses.mape', 'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  diff = tf.abs(
      (y_true - y_pred) / backend.maximum(tf.abs(y_true),
                                          backend.epsilon()))
  return 100. * backend.mean(diff, axis=-1)


@dispatch.dispatch_for_types(mean_absolute_percentage_error,
                             tf.RaggedTensor)
def _ragged_tensor_mape(y_true, y_pred):
  """Support RaggedTensors."""
  return _ragged_tensor_apply_loss(mean_absolute_percentage_error, y_true,
                                   y_pred)


@keras_export('keras.metrics.mean_squared_logarithmic_error',
              'keras.metrics.msle', 'keras.metrics.MSLE',
              'keras.losses.mean_squared_logarithmic_error',
              'keras.losses.msle', 'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = np.maximum(y_true, 1e-7)
  >>> y_pred = np.maximum(y_pred, 1e-7)
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(
  ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.)
  second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.)
  return backend.mean(
      tf.math.squared_difference(first_log, second_log), axis=-1)


@dispatch.dispatch_for_types(mean_squared_logarithmic_error,
                             tf.RaggedTensor)
def _ragged_tensor_msle(y_true, y_pred):
  """Implements support for handling RaggedTensors."""
  return _ragged_tensor_apply_loss(mean_squared_logarithmic_error, y_true,
                                   y_pred)


def _maybe_convert_labels(y_true):
  """Converts binary labels into -1/1."""
  are_zeros = tf.equal(y_true, 0)
  are_ones = tf.equal(y_true, 1)
  is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones))

  def _convert_binary_labels():
    # Convert the binary labels to -1 or 1.
    return 2. * y_true - 1.

  updated_y_true = tf.__internal__.smart_cond.smart_cond(is_binary, _convert_binary_labels,
                                         lambda: y_true)
  return updated_y_true


@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
@tf.__internal__.dispatch.add_dispatch_support
def squared_hinge(y_true, y_pred):
  """Computes the squared hinge loss between `y_true` and `y_pred`.

  `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.choice([-1, 1], size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
      If binary (0 or 1) labels are provided we will convert them to -1 or 1.
      shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
     Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = _maybe_convert_labels(y_true)
  return backend.mean(
      tf.square(tf.maximum(1. - y_true * y_pred, 0.)), axis=-1)


@keras_export('keras.metrics.hinge', 'keras.losses.hinge')
@tf.__internal__.dispatch.add_dispatch_support
def hinge(y_true, y_pred):
  """Computes the hinge loss between `y_true` and `y_pred`.

  `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.choice([-1, 1], size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
      If binary (0 or 1) labels are provided they will be converted to -1 or 1.
      shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = _maybe_convert_labels(y_true)
  return backend.mean(tf.maximum(1. - y_true * y_pred, 0.), axis=-1)


@keras_export('keras.losses.categorical_hinge')
@tf.__internal__.dispatch.add_dispatch_support
def categorical_hinge(y_true, y_pred):
  """Computes the categorical hinge loss between `y_true` and `y_pred`.

  `loss = maximum(neg - pos + 1, 0)`
  where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 3, size=(2,))
  >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> pos = np.sum(y_true * y_pred, axis=-1)
  >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
  >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be
    either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).
    y_pred: The predicted values.

  Returns:
    Categorical hinge loss values.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  pos = tf.reduce_sum(y_true * y_pred, axis=-1)
  neg = tf.reduce_max((1. - y_true) * y_pred, axis=-1)
  zero = tf.cast(0., y_pred.dtype)
  return tf.maximum(neg - pos + 1., zero)


@keras_export('keras.losses.huber', v1=[])
@tf.__internal__.dispatch.add_dispatch_support
def huber(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error = y_true - y_pred`:

  ```
  loss = 0.5 * x^2                  if |x| <= d
  loss = d * |x| - 0.5 * d^2        if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = tf.cast(y_pred, dtype=backend.floatx())
  y_true = tf.cast(y_true, dtype=backend.floatx())
  delta = tf.cast(delta, dtype=backend.floatx())
  error = tf.subtract(y_pred, y_true)
  abs_error = tf.abs(error)
  half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)
  return backend.mean(
      tf.where(abs_error <= delta, half * tf.square(error),
                         delta * abs_error - half * tf.square(delta)),
      axis=-1)


@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh',
              'keras.metrics.log_cosh', 'keras.metrics.logcosh')
@tf.__internal__.dispatch.add_dispatch_support
def log_cosh(y_true, y_pred):
  """Logarithm of the hyperbolic cosine of the prediction error.

  `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
  to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
  like the mean squared error, but will not be so strongly affected by the
  occasional wildly incorrect prediction.

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> x = y_pred - y_true
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
  ...     atol=1e-5)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)

  def _logcosh(x):
    return x + tf.math.softplus(-2. * x) - tf.cast(
        tf.math.log(2.), x.dtype)

  return backend.mean(_logcosh(y_pred - y_true), axis=-1)


@keras_export('keras.metrics.categorical_crossentropy',
              'keras.losses.categorical_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def categorical_crossentropy(y_true,
                             y_pred,
                             from_logits=False,
                             label_smoothing=0,
                             axis=-1):
  """Computes the categorical crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1, 0], [0, 0, 1]]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.0513, 2.303], dtype=float32)

  Args:
    y_true: Tensor of one-hot true targets.
    y_pred: Tensor of predicted targets.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
      example, if `0.1`, use `0.1 / num_classes` for non-target labels
      and `0.9 + 0.1 / num_classes` for target labels.
    axis: Defaults to -1. The dimension along which the entropy is
      computed.

  Returns:
    Categorical crossentropy loss value.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)
    return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.categorical_crossentropy(
      y_true, y_pred, from_logits=from_logits, axis=axis)


@dispatch.dispatch_for_types(categorical_crossentropy,
                             tf.RaggedTensor)
def _ragged_tensor_categorical_crossentropy(y_true,
                                            y_pred,
                                            from_logits=False,
                                            label_smoothing=0,
                                            axis=-1):
  """Implements support for handling RaggedTensors.

  Args:
    y_true: Tensor of one-hot true targets.
    y_pred: Tensor of predicted targets.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
      example, if `0.1`, use `0.1 / num_classes` for non-target labels
      and `0.9 + 0.1 / num_classes` for target labels.
    axis: The axis along which to compute crossentropy (the features axis).
        Defaults to -1.

  Returns:
    Categorical crossentropy loss value.

  Expected shape: (batch, sequence_len, n_classes) with sequence_len
  being variable per batch.
  Return shape: (batch, sequence_len).

  When used by CategoricalCrossentropy() with the default reduction
  (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
  number of elements independent of the batch. E.g. if the RaggedTensor
  has 2 batches with [2, 1] values respectivly the resulting loss is
  the sum of the individual loss values divided by 3.
  """
  fn = functools.partial(
      categorical_crossentropy,
      from_logits=from_logits,
      label_smoothing=label_smoothing,
      axis=axis)
  return _ragged_tensor_apply_loss(fn, y_true, y_pred)


@keras_export('keras.metrics.sparse_categorical_crossentropy',
              'keras.losses.sparse_categorical_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
  """Computes the sparse categorical crossentropy loss.

  Standalone usage:

  >>> y_true = [1, 2]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.0513, 2.303], dtype=float32)

  Args:
    y_true: Ground truth values.
    y_pred: The predicted values.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    axis: Defaults to -1. The dimension along which the entropy is
      computed.

  Returns:
    Sparse categorical crossentropy loss value.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.sparse_categorical_crossentropy(
      y_true, y_pred, from_logits=from_logits, axis=axis)


@dispatch.dispatch_for_types(sparse_categorical_crossentropy,
                             tf.RaggedTensor)
def _ragged_tensor_sparse_categorical_crossentropy(y_true,
                                                   y_pred,
                                                   from_logits=False,
                                                   axis=-1):
  """ Implements support for handling RaggedTensors.

      Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len
      being variable per batch.
      Return shape: (batch, sequence_len).

      When used by SparseCategoricalCrossentropy() with the default reduction
      (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
      number of elements independent of the batch. E.g. if the RaggedTensor
      has 2 batches with [2, 1] values respectively, the resulting loss is
      the sum of the individual loss values divided by 3.
  """
  fn = functools.partial(
      sparse_categorical_crossentropy, from_logits=from_logits, axis=axis)
  return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True)


@keras_export('keras.metrics.binary_crossentropy',
              'keras.losses.binary_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def binary_crossentropy(y_true,
                        y_pred,
                        from_logits=False,
                        label_smoothing=0,
                        axis=-1):
  """Computes the binary crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.916 , 0.714], dtype=float32)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
      squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
      for the target class and `0.5 * label_smoothing` for the non-target class.
    axis: The axis along which the mean is computed. Defaults to -1.

  Returns:
    Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.mean(
      backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
      axis=axis)


@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor)
def _ragged_tensor_binary_crossentropy(y_true,
                                       y_pred,
                                       from_logits=False,
                                       label_smoothing=0,
                                       axis=-1):
  """Implements support for handling RaggedTensors.

  Args:
    y_true: Tensor of one-hot true targets.
    y_pred: Tensor of predicted targets.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
      example, if `0.1`, use `0.1 / num_classes` for non-target labels
      and `0.9 + 0.1 / num_classes` for target labels.
    axis: Axis along which to compute crossentropy.

  Returns:
    Binary crossentropy loss value.

  Expected shape: (batch, sequence_len) with sequence_len being variable
  per batch.
  Return shape: (batch,); returns the per batch mean of the loss values.

  When used by BinaryCrossentropy() with the default reduction
  (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
  the number of batches.
  """
  fn = functools.partial(
      binary_crossentropy,
      from_logits=from_logits,
      label_smoothing=label_smoothing,
      axis=axis)
  return _ragged_tensor_apply_loss(fn, y_true, y_pred)


@keras_export('keras.metrics.kl_divergence',
              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
              'keras.metrics.KLD', 'keras.losses.kl_divergence',
              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
              'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
  >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.

  Returns:
    A `Tensor` with loss.

  Raises:
    TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = backend.clip(y_true, backend.epsilon(), 1)
  y_pred = backend.clip(y_pred, backend.epsilon(), 1)
  return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)


@keras_export('keras.metrics.poisson', 'keras.losses.poisson')
@tf.__internal__.dispatch.add_dispatch_support
def poisson(y_true, y_pred):
  """Computes the Poisson loss between y_true and y_pred.

  The Poisson loss is the mean of the elements of the `Tensor`
  `y_pred - y_true * log(y_pred)`.

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.poisson(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_pred = y_pred + 1e-7
  >>> assert np.allclose(
  ...     loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
  ...     atol=1e-5)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
     Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

  Raises:
    InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(
      y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1)


@keras_export(
    'keras.losses.cosine_similarity',
    v1=[
        'keras.metrics.cosine_proximity',
        'keras.metrics.cosine',
        'keras.losses.cosine_proximity',
        'keras.losses.cosine',
        'keras.losses.cosine_similarity',
    ])
@tf.__internal__.dispatch.add_dispatch_support
def cosine_similarity(y_true, y_pred, axis=-1):
  """Computes the cosine similarity between labels and predictions.

  Note that it is a number between -1 and 1. When it is a negative number
  between -1 and 0, 0 indicates orthogonality and values closer to -1
  indicate greater similarity. The values closer to 1 indicate greater
  dissimilarity. This makes it usable as a loss function in a setting
  where you try to maximize the proximity between predictions and
  targets. If either `y_true` or `y_pred` is a zero vector, cosine
  similarity will be 0 regardless of the proximity between predictions
  and targets.

  `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

  Standalone usage:

  >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
  >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
  >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
  >>> loss.numpy()
  array([-0., -0.999, 0.999], dtype=float32)

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.
    axis: Axis along which to determine similarity.

  Returns:
    Cosine similarity tensor.
  """
  y_true = tf.linalg.l2_normalize(y_true, axis=axis)
  y_pred = tf.linalg.l2_normalize(y_pred, axis=axis)
  return -tf.reduce_sum(y_true * y_pred, axis=axis)


@keras_export('keras.losses.CosineSimilarity')
class CosineSimilarity(LossFunctionWrapper):
  """Computes the cosine similarity between labels and predictions.

  Note that it is a number between -1 and 1. When it is a negative number
  between -1 and 0, 0 indicates orthogonality and values closer to -1
  indicate greater similarity. The values closer to 1 indicate greater
  dissimilarity. This makes it usable as a loss function in a setting
  where you try to maximize the proximity between predictions and targets.
  If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0
  regardless of the proximity between predictions and targets.

  `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

  Standalone usage:

  >>> y_true = [[0., 1.], [1., 1.]]
  >>> y_pred = [[1., 0.], [1., 1.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
  >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
  >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
  >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
  >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
  >>> #       = -((0. + 0.) +  (0.5 + 0.5)) / 2
  >>> cosine_loss(y_true, y_pred).numpy()
  -0.5

  >>> # Calling with 'sample_weight'.
  >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  -0.0999

  >>> # Using 'sum' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> cosine_loss(y_true, y_pred).numpy()
  -0.999

  >>> # Using 'none' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> cosine_loss(y_true, y_pred).numpy()
  array([-0., -0.999], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))
  ```

  Args:
    axis: The axis along which the cosine similarity is computed
      (the features axis). Defaults to -1.
    reduction: Type of `tf.keras.losses.Reduction` to apply to loss.
      Default value is `AUTO`. `AUTO` indicates that the reduction option will
      be determined by the usage context. For almost all cases this defaults to
      `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
      built-in training loops such as `tf.keras` `compile` and `fit`, using
      `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this
      custom training [tutorial]
      (https://www.tensorflow.org/tutorials/distribute/custom_training) for more
        details.
    name: Optional name for the instance.
  """

  def __init__(self,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='cosine_similarity'):
    super().__init__(
        cosine_similarity, reduction=reduction, name=name, axis=axis)


# Aliases.

bce = BCE = binary_crossentropy
mse = MSE = mean_squared_error
mae = MAE = mean_absolute_error
mape = MAPE = mean_absolute_percentage_error
msle = MSLE = mean_squared_logarithmic_error
kld = KLD = kullback_leibler_divergence = kl_divergence
logcosh = log_cosh
huber_loss = huber


def is_categorical_crossentropy(loss):
  result = ((isinstance(loss, CategoricalCrossentropy) or
             (isinstance(loss, LossFunctionWrapper) and
              loss.fn == categorical_crossentropy) or
             (hasattr(loss, '__name__') and
              loss.__name__ == 'categorical_crossentropy') or
             (loss == 'categorical_crossentropy')))
  return result


@keras_export('keras.losses.serialize')
def serialize(loss):
  """Serializes loss function or `Loss` instance.

  Args:
    loss: A Keras `Loss` instance or a loss function.

  Returns:
    Loss configuration dictionary.
  """
  return serialize_keras_object(loss)


@keras_export('keras.losses.deserialize')
def deserialize(name, custom_objects=None):
  """Deserializes a serialized loss class/function instance.

  Args:
      name: Loss configuration.
      custom_objects: Optional dictionary mapping names (strings) to custom
        objects (classes and functions) to be considered during deserialization.

  Returns:
      A Keras `Loss` instance or a loss function.
  """
  return deserialize_keras_object(
      name,
      module_objects=globals(),
      custom_objects=custom_objects,
      printable_module_name='loss function')


@keras_export('keras.losses.get')
def get(identifier):
  """Retrieves a Keras loss as a `function`/`Loss` class instance.

  The `identifier` may be the string name of a loss function or `Loss` class.

  >>> loss = tf.keras.losses.get("categorical_crossentropy")
  >>> type(loss)
  <class 'function'>
  >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
  >>> type(loss)
  <class '...keras.losses.CategoricalCrossentropy'>

  You can also specify `config` of the loss to this function by passing dict
  containing `class_name` and `config` as an identifier. Also note that the
  `class_name` must map to a `Loss` class

  >>> identifier = {"class_name": "CategoricalCrossentropy",
  ...               "config": {"from_logits": True}}
  >>> loss = tf.keras.losses.get(identifier)
  >>> type(loss)
  <class '...keras.losses.CategoricalCrossentropy'>

  Args:
    identifier: A loss identifier. One of None or string name of a loss
      function/class or loss configuration dictionary or a loss function or a
      loss class instance.

  Returns:
    A Keras loss as a `function`/ `Loss` class instance.

  Raises:
    ValueError: If `identifier` cannot be interpreted.
  """
  if identifier is None:
    return None
  if isinstance(identifier, str):
    identifier = str(identifier)
    return deserialize(identifier)
  if isinstance(identifier, dict):
    return deserialize(identifier)
  if callable(identifier):
    return identifier
  raise ValueError(
      f'Could not interpret loss function identifier: {identifier}')


LABEL_DTYPES_FOR_LOSSES = {
    tf.compat.v1.losses.sparse_softmax_cross_entropy: 'int32',
    sparse_categorical_crossentropy: 'int32'
}

Functions

def BCE(y_true, y_pred, from_logits=False, label_smoothing=0, axis=-1)

Computes the binary crossentropy loss.

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> loss.numpy()
array([0.916 , 0.714], dtype=float32)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].
from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
label_smoothing
Float in [0, 1]. If > 0 then smooth the labels by squeezing them towards 0.5 That is, using 1. - 0.5 * label_smoothing for the target class and 0.5 * label_smoothing for the non-target class.
axis
The axis along which the mean is computed. Defaults to -1.

Returns

Binary crossentropy loss value. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.binary_crossentropy',
              'keras.losses.binary_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def binary_crossentropy(y_true,
                        y_pred,
                        from_logits=False,
                        label_smoothing=0,
                        axis=-1):
  """Computes the binary crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.916 , 0.714], dtype=float32)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
      squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
      for the target class and `0.5 * label_smoothing` for the non-target class.
    axis: The axis along which the mean is computed. Defaults to -1.

  Returns:
    Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.mean(
      backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
      axis=axis)
def KLD(y_true, y_pred)

Computes Kullback-Leibler divergence loss between y_true and y_pred.

loss = y_true * log(y_true / y_pred)

See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
>>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
>>> assert np.array_equal(
...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

Args

y_true
Tensor of true targets.
y_pred
Tensor of predicted targets.

Returns

A Tensor with loss.

Raises

TypeError
If y_true cannot be cast to the y_pred.dtype.
Expand source code
@keras_export('keras.metrics.kl_divergence',
              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
              'keras.metrics.KLD', 'keras.losses.kl_divergence',
              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
              'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
  >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.

  Returns:
    A `Tensor` with loss.

  Raises:
    TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = backend.clip(y_true, backend.epsilon(), 1)
  y_pred = backend.clip(y_pred, backend.epsilon(), 1)
  return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)
def MAE(y_true, y_pred)

Computes the mean absolute error between labels and predictions.

loss = mean(abs(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
              'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
              'keras.losses.mae', 'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred):
  """Computes the mean absolute error between labels and predictions.

  `loss = mean(abs(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.abs(y_pred - y_true), axis=-1)
def MAPE(y_true, y_pred)

Computes the mean absolute percentage error between y_true and y_pred.

loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)

Standalone usage:

>>> y_true = np.random.random(size=(2, 3))
>>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(),
...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute percentage error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_percentage_error',
              'keras.metrics.mape', 'keras.metrics.MAPE',
              'keras.losses.mean_absolute_percentage_error',
              'keras.losses.mape', 'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  diff = tf.abs(
      (y_true - y_pred) / backend.maximum(tf.abs(y_true),
                                          backend.epsilon()))
  return 100. * backend.mean(diff, axis=-1)
def MSE(y_true, y_pred)

Computes the mean squared error between labels and predictions.

After computing the squared distance between the inputs, the mean value over the last dimension is returned.

loss = mean(square(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
              'keras.metrics.MSE', 'keras.losses.mean_squared_error',
              'keras.losses.mse', 'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred):
  """Computes the mean squared error between labels and predictions.

  After computing the squared distance between the inputs, the mean value over
  the last dimension is returned.

  `loss = mean(square(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
def MSLE(y_true, y_pred)

Computes the mean squared logarithmic error between y_true and y_pred.

loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = np.maximum(y_true, 1e-7)
>>> y_pred = np.maximum(y_pred, 1e-7)
>>> assert np.allclose(
...     loss.numpy(),
...     np.mean(
...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared logarithmic error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_logarithmic_error',
              'keras.metrics.msle', 'keras.metrics.MSLE',
              'keras.losses.mean_squared_logarithmic_error',
              'keras.losses.msle', 'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = np.maximum(y_true, 1e-7)
  >>> y_pred = np.maximum(y_pred, 1e-7)
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(
  ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.)
  second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.)
  return backend.mean(
      tf.math.squared_difference(first_log, second_log), axis=-1)
def bce(y_true, y_pred, from_logits=False, label_smoothing=0, axis=-1)

Computes the binary crossentropy loss.

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> loss.numpy()
array([0.916 , 0.714], dtype=float32)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].
from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
label_smoothing
Float in [0, 1]. If > 0 then smooth the labels by squeezing them towards 0.5 That is, using 1. - 0.5 * label_smoothing for the target class and 0.5 * label_smoothing for the non-target class.
axis
The axis along which the mean is computed. Defaults to -1.

Returns

Binary crossentropy loss value. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.binary_crossentropy',
              'keras.losses.binary_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def binary_crossentropy(y_true,
                        y_pred,
                        from_logits=False,
                        label_smoothing=0,
                        axis=-1):
  """Computes the binary crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.916 , 0.714], dtype=float32)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
      squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
      for the target class and `0.5 * label_smoothing` for the non-target class.
    axis: The axis along which the mean is computed. Defaults to -1.

  Returns:
    Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.mean(
      backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
      axis=axis)
def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0, axis=-1)

Computes the binary crossentropy loss.

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> loss.numpy()
array([0.916 , 0.714], dtype=float32)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].
from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
label_smoothing
Float in [0, 1]. If > 0 then smooth the labels by squeezing them towards 0.5 That is, using 1. - 0.5 * label_smoothing for the target class and 0.5 * label_smoothing for the non-target class.
axis
The axis along which the mean is computed. Defaults to -1.

Returns

Binary crossentropy loss value. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.binary_crossentropy',
              'keras.losses.binary_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def binary_crossentropy(y_true,
                        y_pred,
                        from_logits=False,
                        label_smoothing=0,
                        axis=-1):
  """Computes the binary crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.916 , 0.714], dtype=float32)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
      squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
      for the target class and `0.5 * label_smoothing` for the non-target class.
    axis: The axis along which the mean is computed. Defaults to -1.

  Returns:
    Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.mean(
      backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
      axis=axis)
def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0, axis=-1)

Computes the categorical crossentropy loss.

Standalone usage:

>>> y_true = [[0, 1, 0], [0, 0, 1]]
>>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
>>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> loss.numpy()
array([0.0513, 2.303], dtype=float32)

Args

y_true
Tensor of one-hot true targets.
y_pred
Tensor of predicted targets.
from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
label_smoothing
Float in [0, 1]. If > 0 then smooth the labels. For example, if 0.1, use 0.1 / num_classes for non-target labels and 0.9 + 0.1 / num_classes for target labels.
axis
Defaults to -1. The dimension along which the entropy is computed.

Returns

Categorical crossentropy loss value.

Expand source code
@keras_export('keras.metrics.categorical_crossentropy',
              'keras.losses.categorical_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def categorical_crossentropy(y_true,
                             y_pred,
                             from_logits=False,
                             label_smoothing=0,
                             axis=-1):
  """Computes the categorical crossentropy loss.

  Standalone usage:

  >>> y_true = [[0, 1, 0], [0, 0, 1]]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.0513, 2.303], dtype=float32)

  Args:
    y_true: Tensor of one-hot true targets.
    y_pred: Tensor of predicted targets.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
      example, if `0.1`, use `0.1 / num_classes` for non-target labels
      and `0.9 + 0.1 / num_classes` for target labels.
    axis: Defaults to -1. The dimension along which the entropy is
      computed.

  Returns:
    Categorical crossentropy loss value.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  label_smoothing = tf.convert_to_tensor(
      label_smoothing, dtype=backend.floatx())

  def _smooth_labels():
    num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)
    return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)

  y_true = tf.__internal__.smart_cond.smart_cond(label_smoothing, _smooth_labels,
                                 lambda: y_true)

  return backend.categorical_crossentropy(
      y_true, y_pred, from_logits=from_logits, axis=axis)
def categorical_hinge(y_true, y_pred)

Computes the categorical hinge loss between y_true and y_pred.

loss = maximum(neg - pos + 1, 0) where neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)

Standalone usage:

>>> y_true = np.random.randint(0, 3, size=(2,))
>>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> pos = np.sum(y_true * y_pred, axis=-1)
>>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
>>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

Args

y_true
The ground truth values. y_true values are expected to be
either {-1, +1} or {0, 1} (i.e. a one-hot-encoded tensor).
y_pred
The predicted values.

Returns

Categorical hinge loss values.

Expand source code
@keras_export('keras.losses.categorical_hinge')
@tf.__internal__.dispatch.add_dispatch_support
def categorical_hinge(y_true, y_pred):
  """Computes the categorical hinge loss between `y_true` and `y_pred`.

  `loss = maximum(neg - pos + 1, 0)`
  where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 3, size=(2,))
  >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> pos = np.sum(y_true * y_pred, axis=-1)
  >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
  >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be
    either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).
    y_pred: The predicted values.

  Returns:
    Categorical hinge loss values.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  pos = tf.reduce_sum(y_true * y_pred, axis=-1)
  neg = tf.reduce_max((1. - y_true) * y_pred, axis=-1)
  zero = tf.cast(0., y_pred.dtype)
  return tf.maximum(neg - pos + 1., zero)
def cosine_similarity(y_true, y_pred, axis=-1)

Computes the cosine similarity between labels and predictions.

Note that it is a number between -1 and 1. When it is a negative number between -1 and 0, 0 indicates orthogonality and values closer to -1 indicate greater similarity. The values closer to 1 indicate greater dissimilarity. This makes it usable as a loss function in a setting where you try to maximize the proximity between predictions and targets. If either y_true or y_pred is a zero vector, cosine similarity will be 0 regardless of the proximity between predictions and targets.

loss = -sum(l2_norm(y_true) * l2_norm(y_pred))

Standalone usage:

>>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
>>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
>>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
>>> loss.numpy()
array([-0., -0.999, 0.999], dtype=float32)

Args

y_true
Tensor of true targets.
y_pred
Tensor of predicted targets.
axis
Axis along which to determine similarity.

Returns

Cosine similarity tensor.

Expand source code
@keras_export(
    'keras.losses.cosine_similarity',
    v1=[
        'keras.metrics.cosine_proximity',
        'keras.metrics.cosine',
        'keras.losses.cosine_proximity',
        'keras.losses.cosine',
        'keras.losses.cosine_similarity',
    ])
@tf.__internal__.dispatch.add_dispatch_support
def cosine_similarity(y_true, y_pred, axis=-1):
  """Computes the cosine similarity between labels and predictions.

  Note that it is a number between -1 and 1. When it is a negative number
  between -1 and 0, 0 indicates orthogonality and values closer to -1
  indicate greater similarity. The values closer to 1 indicate greater
  dissimilarity. This makes it usable as a loss function in a setting
  where you try to maximize the proximity between predictions and
  targets. If either `y_true` or `y_pred` is a zero vector, cosine
  similarity will be 0 regardless of the proximity between predictions
  and targets.

  `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

  Standalone usage:

  >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
  >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
  >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
  >>> loss.numpy()
  array([-0., -0.999, 0.999], dtype=float32)

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.
    axis: Axis along which to determine similarity.

  Returns:
    Cosine similarity tensor.
  """
  y_true = tf.linalg.l2_normalize(y_true, axis=axis)
  y_pred = tf.linalg.l2_normalize(y_pred, axis=axis)
  return -tf.reduce_sum(y_true * y_pred, axis=axis)
def deserialize(name, custom_objects=None)

Deserializes a serialized loss class/function instance.

Args

name
Loss configuration.
custom_objects
Optional dictionary mapping names (strings) to custom objects (classes and functions) to be considered during deserialization.

Returns

A Keras Loss instance or a loss function.

Expand source code
@keras_export('keras.losses.deserialize')
def deserialize(name, custom_objects=None):
  """Deserializes a serialized loss class/function instance.

  Args:
      name: Loss configuration.
      custom_objects: Optional dictionary mapping names (strings) to custom
        objects (classes and functions) to be considered during deserialization.

  Returns:
      A Keras `Loss` instance or a loss function.
  """
  return deserialize_keras_object(
      name,
      module_objects=globals(),
      custom_objects=custom_objects,
      printable_module_name='loss function')
def get(identifier)

Retrieves a Keras loss as a function/Loss class instance.

The identifier may be the string name of a loss function or Loss class.

>>> loss = tf.keras.losses.get("categorical_crossentropy")
>>> type(loss)
<class 'function'>
>>> loss = tf.keras.losses.get("CategoricalCrossentropy")
>>> type(loss)
<class '...keras.losses.CategoricalCrossentropy'>

You can also specify config of the loss to this function by passing dict containing class_name and config as an identifier. Also note that the class_name must map to a Loss class

>>> identifier = {"class_name": "CategoricalCrossentropy",
...               "config": {"from_logits": True}}
>>> loss = tf.keras.losses.get(identifier)
>>> type(loss)
<class '...keras.losses.CategoricalCrossentropy'>

Args

identifier
A loss identifier. One of None or string name of a loss function/class or loss configuration dictionary or a loss function or a loss class instance.

Returns

A Keras loss as a function/ Loss class instance.

Raises

ValueError
If identifier cannot be interpreted.
Expand source code
@keras_export('keras.losses.get')
def get(identifier):
  """Retrieves a Keras loss as a `function`/`Loss` class instance.

  The `identifier` may be the string name of a loss function or `Loss` class.

  >>> loss = tf.keras.losses.get("categorical_crossentropy")
  >>> type(loss)
  <class 'function'>
  >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
  >>> type(loss)
  <class '...keras.losses.CategoricalCrossentropy'>

  You can also specify `config` of the loss to this function by passing dict
  containing `class_name` and `config` as an identifier. Also note that the
  `class_name` must map to a `Loss` class

  >>> identifier = {"class_name": "CategoricalCrossentropy",
  ...               "config": {"from_logits": True}}
  >>> loss = tf.keras.losses.get(identifier)
  >>> type(loss)
  <class '...keras.losses.CategoricalCrossentropy'>

  Args:
    identifier: A loss identifier. One of None or string name of a loss
      function/class or loss configuration dictionary or a loss function or a
      loss class instance.

  Returns:
    A Keras loss as a `function`/ `Loss` class instance.

  Raises:
    ValueError: If `identifier` cannot be interpreted.
  """
  if identifier is None:
    return None
  if isinstance(identifier, str):
    identifier = str(identifier)
    return deserialize(identifier)
  if isinstance(identifier, dict):
    return deserialize(identifier)
  if callable(identifier):
    return identifier
  raise ValueError(
      f'Could not interpret loss function identifier: {identifier}')
def hinge(y_true, y_pred)

Computes the hinge loss between y_true and y_pred.

loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)

Standalone usage:

>>> y_true = np.random.choice([-1, 1], size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.hinge(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(),
...     np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

Args

y_true
The ground truth values. y_true values are expected to be -1 or 1. If binary (0 or 1) labels are provided they will be converted to -1 or 1. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Hinge loss values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.hinge', 'keras.losses.hinge')
@tf.__internal__.dispatch.add_dispatch_support
def hinge(y_true, y_pred):
  """Computes the hinge loss between `y_true` and `y_pred`.

  `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.choice([-1, 1], size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
      If binary (0 or 1) labels are provided they will be converted to -1 or 1.
      shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = _maybe_convert_labels(y_true)
  return backend.mean(tf.maximum(1. - y_true * y_pred, 0.), axis=-1)
def huber(y_true, y_pred, delta=1.0)

Computes Huber loss value.

For each value x in error = y_true - y_pred:

loss = 0.5 * x^2                  if |x| <= d
loss = d * |x| - 0.5 * d^2        if |x| > d

where d is delta. See: https://en.wikipedia.org/wiki/Huber_loss

Args

y_true
tensor of true targets.
y_pred
tensor of predicted targets.
delta
A float, the point where the Huber loss function changes from a quadratic to linear.

Returns

Tensor with one scalar loss entry per sample.

Expand source code
@keras_export('keras.losses.huber', v1=[])
@tf.__internal__.dispatch.add_dispatch_support
def huber(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error = y_true - y_pred`:

  ```
  loss = 0.5 * x^2                  if |x| <= d
  loss = d * |x| - 0.5 * d^2        if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = tf.cast(y_pred, dtype=backend.floatx())
  y_true = tf.cast(y_true, dtype=backend.floatx())
  delta = tf.cast(delta, dtype=backend.floatx())
  error = tf.subtract(y_pred, y_true)
  abs_error = tf.abs(error)
  half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)
  return backend.mean(
      tf.where(abs_error <= delta, half * tf.square(error),
                         delta * abs_error - half * tf.square(delta)),
      axis=-1)
def huber_loss(y_true, y_pred, delta=1.0)

Computes Huber loss value.

For each value x in error = y_true - y_pred:

loss = 0.5 * x^2                  if |x| <= d
loss = d * |x| - 0.5 * d^2        if |x| > d

where d is delta. See: https://en.wikipedia.org/wiki/Huber_loss

Args

y_true
tensor of true targets.
y_pred
tensor of predicted targets.
delta
A float, the point where the Huber loss function changes from a quadratic to linear.

Returns

Tensor with one scalar loss entry per sample.

Expand source code
@keras_export('keras.losses.huber', v1=[])
@tf.__internal__.dispatch.add_dispatch_support
def huber(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error = y_true - y_pred`:

  ```
  loss = 0.5 * x^2                  if |x| <= d
  loss = d * |x| - 0.5 * d^2        if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = tf.cast(y_pred, dtype=backend.floatx())
  y_true = tf.cast(y_true, dtype=backend.floatx())
  delta = tf.cast(delta, dtype=backend.floatx())
  error = tf.subtract(y_pred, y_true)
  abs_error = tf.abs(error)
  half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)
  return backend.mean(
      tf.where(abs_error <= delta, half * tf.square(error),
                         delta * abs_error - half * tf.square(delta)),
      axis=-1)
def is_categorical_crossentropy(loss)
Expand source code
def is_categorical_crossentropy(loss):
  result = ((isinstance(loss, CategoricalCrossentropy) or
             (isinstance(loss, LossFunctionWrapper) and
              loss.fn == categorical_crossentropy) or
             (hasattr(loss, '__name__') and
              loss.__name__ == 'categorical_crossentropy') or
             (loss == 'categorical_crossentropy')))
  return result
def kl_divergence(y_true, y_pred)

Computes Kullback-Leibler divergence loss between y_true and y_pred.

loss = y_true * log(y_true / y_pred)

See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
>>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
>>> assert np.array_equal(
...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

Args

y_true
Tensor of true targets.
y_pred
Tensor of predicted targets.

Returns

A Tensor with loss.

Raises

TypeError
If y_true cannot be cast to the y_pred.dtype.
Expand source code
@keras_export('keras.metrics.kl_divergence',
              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
              'keras.metrics.KLD', 'keras.losses.kl_divergence',
              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
              'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
  >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.

  Returns:
    A `Tensor` with loss.

  Raises:
    TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = backend.clip(y_true, backend.epsilon(), 1)
  y_pred = backend.clip(y_pred, backend.epsilon(), 1)
  return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)
def kld(y_true, y_pred)

Computes Kullback-Leibler divergence loss between y_true and y_pred.

loss = y_true * log(y_true / y_pred)

See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
>>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
>>> assert np.array_equal(
...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

Args

y_true
Tensor of true targets.
y_pred
Tensor of predicted targets.

Returns

A Tensor with loss.

Raises

TypeError
If y_true cannot be cast to the y_pred.dtype.
Expand source code
@keras_export('keras.metrics.kl_divergence',
              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
              'keras.metrics.KLD', 'keras.losses.kl_divergence',
              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
              'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
  >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.

  Returns:
    A `Tensor` with loss.

  Raises:
    TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = backend.clip(y_true, backend.epsilon(), 1)
  y_pred = backend.clip(y_pred, backend.epsilon(), 1)
  return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)
def kullback_leibler_divergence(y_true, y_pred)

Computes Kullback-Leibler divergence loss between y_true and y_pred.

loss = y_true * log(y_true / y_pred)

See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
>>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
>>> assert np.array_equal(
...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

Args

y_true
Tensor of true targets.
y_pred
Tensor of predicted targets.

Returns

A Tensor with loss.

Raises

TypeError
If y_true cannot be cast to the y_pred.dtype.
Expand source code
@keras_export('keras.metrics.kl_divergence',
              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
              'keras.metrics.KLD', 'keras.losses.kl_divergence',
              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
              'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
  >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

  Args:
    y_true: Tensor of true targets.
    y_pred: Tensor of predicted targets.

  Returns:
    A `Tensor` with loss.

  Raises:
    TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = backend.clip(y_true, backend.epsilon(), 1)
  y_pred = backend.clip(y_pred, backend.epsilon(), 1)
  return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)
def log_cosh(y_true, y_pred)

Logarithm of the hyperbolic cosine of the prediction error.

log(cosh(x)) is approximately equal to (x ** 2) / 2 for small x and to abs(x) - log(2) for large x. This means that 'logcosh' works mostly like the mean squared error, but will not be so strongly affected by the occasional wildly incorrect prediction.

Standalone usage:

>>> y_true = np.random.random(size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.logcosh(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> x = y_pred - y_true
>>> assert np.allclose(
...     loss.numpy(),
...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
...     atol=1e-5)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Logcosh error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh',
              'keras.metrics.log_cosh', 'keras.metrics.logcosh')
@tf.__internal__.dispatch.add_dispatch_support
def log_cosh(y_true, y_pred):
  """Logarithm of the hyperbolic cosine of the prediction error.

  `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
  to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
  like the mean squared error, but will not be so strongly affected by the
  occasional wildly incorrect prediction.

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> x = y_pred - y_true
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
  ...     atol=1e-5)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)

  def _logcosh(x):
    return x + tf.math.softplus(-2. * x) - tf.cast(
        tf.math.log(2.), x.dtype)

  return backend.mean(_logcosh(y_pred - y_true), axis=-1)
def logcosh(y_true, y_pred)

Logarithm of the hyperbolic cosine of the prediction error.

log(cosh(x)) is approximately equal to (x ** 2) / 2 for small x and to abs(x) - log(2) for large x. This means that 'logcosh' works mostly like the mean squared error, but will not be so strongly affected by the occasional wildly incorrect prediction.

Standalone usage:

>>> y_true = np.random.random(size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.logcosh(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> x = y_pred - y_true
>>> assert np.allclose(
...     loss.numpy(),
...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
...     atol=1e-5)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Logcosh error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh',
              'keras.metrics.log_cosh', 'keras.metrics.logcosh')
@tf.__internal__.dispatch.add_dispatch_support
def log_cosh(y_true, y_pred):
  """Logarithm of the hyperbolic cosine of the prediction error.

  `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
  to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
  like the mean squared error, but will not be so strongly affected by the
  occasional wildly incorrect prediction.

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> x = y_pred - y_true
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
  ...     atol=1e-5)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)

  def _logcosh(x):
    return x + tf.math.softplus(-2. * x) - tf.cast(
        tf.math.log(2.), x.dtype)

  return backend.mean(_logcosh(y_pred - y_true), axis=-1)
def mae(y_true, y_pred)

Computes the mean absolute error between labels and predictions.

loss = mean(abs(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
              'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
              'keras.losses.mae', 'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred):
  """Computes the mean absolute error between labels and predictions.

  `loss = mean(abs(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.abs(y_pred - y_true), axis=-1)
def mape(y_true, y_pred)

Computes the mean absolute percentage error between y_true and y_pred.

loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)

Standalone usage:

>>> y_true = np.random.random(size=(2, 3))
>>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(),
...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute percentage error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_percentage_error',
              'keras.metrics.mape', 'keras.metrics.MAPE',
              'keras.losses.mean_absolute_percentage_error',
              'keras.losses.mape', 'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  diff = tf.abs(
      (y_true - y_pred) / backend.maximum(tf.abs(y_true),
                                          backend.epsilon()))
  return 100. * backend.mean(diff, axis=-1)
def mean_absolute_error(y_true, y_pred)

Computes the mean absolute error between labels and predictions.

loss = mean(abs(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
              'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
              'keras.losses.mae', 'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred):
  """Computes the mean absolute error between labels and predictions.

  `loss = mean(abs(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.abs(y_pred - y_true), axis=-1)
def mean_absolute_percentage_error(y_true, y_pred)

Computes the mean absolute percentage error between y_true and y_pred.

loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)

Standalone usage:

>>> y_true = np.random.random(size=(2, 3))
>>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(),
...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean absolute percentage error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_absolute_percentage_error',
              'keras.metrics.mape', 'keras.metrics.MAPE',
              'keras.losses.mean_absolute_percentage_error',
              'keras.losses.mape', 'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.random(size=(2, 3))
  >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  diff = tf.abs(
      (y_true - y_pred) / backend.maximum(tf.abs(y_true),
                                          backend.epsilon()))
  return 100. * backend.mean(diff, axis=-1)
def mean_squared_error(y_true, y_pred)

Computes the mean squared error between labels and predictions.

After computing the squared distance between the inputs, the mean value over the last dimension is returned.

loss = mean(square(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
              'keras.metrics.MSE', 'keras.losses.mean_squared_error',
              'keras.losses.mse', 'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred):
  """Computes the mean squared error between labels and predictions.

  After computing the squared distance between the inputs, the mean value over
  the last dimension is returned.

  `loss = mean(square(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
def mean_squared_logarithmic_error(y_true, y_pred)

Computes the mean squared logarithmic error between y_true and y_pred.

loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = np.maximum(y_true, 1e-7)
>>> y_pred = np.maximum(y_pred, 1e-7)
>>> assert np.allclose(
...     loss.numpy(),
...     np.mean(
...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared logarithmic error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_logarithmic_error',
              'keras.metrics.msle', 'keras.metrics.MSLE',
              'keras.losses.mean_squared_logarithmic_error',
              'keras.losses.msle', 'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = np.maximum(y_true, 1e-7)
  >>> y_pred = np.maximum(y_pred, 1e-7)
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(
  ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.)
  second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.)
  return backend.mean(
      tf.math.squared_difference(first_log, second_log), axis=-1)
def mse(y_true, y_pred)

Computes the mean squared error between labels and predictions.

After computing the squared distance between the inputs, the mean value over the last dimension is returned.

loss = mean(square(y_true - y_pred), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
              'keras.metrics.MSE', 'keras.losses.mean_squared_error',
              'keras.losses.mse', 'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred):
  """Computes the mean squared error between labels and predictions.

  After computing the squared distance between the inputs, the mean value over
  the last dimension is returned.

  `loss = mean(square(y_true - y_pred), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
def msle(y_true, y_pred)

Computes the mean squared logarithmic error between y_true and y_pred.

loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_true = np.maximum(y_true, 1e-7)
>>> y_pred = np.maximum(y_pred, 1e-7)
>>> assert np.allclose(
...     loss.numpy(),
...     np.mean(
...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Mean squared logarithmic error values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.mean_squared_logarithmic_error',
              'keras.metrics.msle', 'keras.metrics.MSLE',
              'keras.losses.mean_squared_logarithmic_error',
              'keras.losses.msle', 'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_true = np.maximum(y_true, 1e-7)
  >>> y_pred = np.maximum(y_pred, 1e-7)
  >>> assert np.allclose(
  ...     loss.numpy(),
  ...     np.mean(
  ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
    Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.)
  second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.)
  return backend.mean(
      tf.math.squared_difference(first_log, second_log), axis=-1)
def poisson(y_true, y_pred)

Computes the Poisson loss between y_true and y_pred.

The Poisson loss is the mean of the elements of the Tensor y_pred - y_true * log(y_pred).

Standalone usage:

>>> y_true = np.random.randint(0, 2, size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.poisson(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> y_pred = y_pred + 1e-7
>>> assert np.allclose(
...     loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
...     atol=1e-5)

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Poisson loss value. shape = [batch_size, d0, .. dN-1].

Raises

InvalidArgumentError
If y_true and y_pred have incompatible shapes.
Expand source code
@keras_export('keras.metrics.poisson', 'keras.losses.poisson')
@tf.__internal__.dispatch.add_dispatch_support
def poisson(y_true, y_pred):
  """Computes the Poisson loss between y_true and y_pred.

  The Poisson loss is the mean of the elements of the `Tensor`
  `y_pred - y_true * log(y_pred)`.

  Standalone usage:

  >>> y_true = np.random.randint(0, 2, size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.poisson(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> y_pred = y_pred + 1e-7
  >>> assert np.allclose(
  ...     loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
  ...     atol=1e-5)

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
     Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

  Raises:
    InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.mean(
      y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1)
def serialize(loss)

Serializes loss function or Loss instance.

Args

loss
A Keras Loss instance or a loss function.

Returns

Loss configuration dictionary.

Expand source code
@keras_export('keras.losses.serialize')
def serialize(loss):
  """Serializes loss function or `Loss` instance.

  Args:
    loss: A Keras `Loss` instance or a loss function.

  Returns:
    Loss configuration dictionary.
  """
  return serialize_keras_object(loss)
def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1)

Computes the sparse categorical crossentropy loss.

Standalone usage:

>>> y_true = [1, 2]
>>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
>>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> loss.numpy()
array([0.0513, 2.303], dtype=float32)

Args

y_true
Ground truth values.
y_pred
The predicted values.
from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
axis
Defaults to -1. The dimension along which the entropy is computed.

Returns

Sparse categorical crossentropy loss value.

Expand source code
@keras_export('keras.metrics.sparse_categorical_crossentropy',
              'keras.losses.sparse_categorical_crossentropy')
@tf.__internal__.dispatch.add_dispatch_support
def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
  """Computes the sparse categorical crossentropy loss.

  Standalone usage:

  >>> y_true = [1, 2]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> loss.numpy()
  array([0.0513, 2.303], dtype=float32)

  Args:
    y_true: Ground truth values.
    y_pred: The predicted values.
    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
      we assume that `y_pred` encodes a probability distribution.
    axis: Defaults to -1. The dimension along which the entropy is
      computed.

  Returns:
    Sparse categorical crossentropy loss value.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  return backend.sparse_categorical_crossentropy(
      y_true, y_pred, from_logits=from_logits, axis=axis)
def squared_hinge(y_true, y_pred)

Computes the squared hinge loss between y_true and y_pred.

loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)

Standalone usage:

>>> y_true = np.random.choice([-1, 1], size=(2, 3))
>>> y_pred = np.random.random(size=(2, 3))
>>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
>>> assert loss.shape == (2,)
>>> assert np.array_equal(
...     loss.numpy(),
...     np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

Args

y_true
The ground truth values. y_true values are expected to be -1 or 1. If binary (0 or 1) labels are provided we will convert them to -1 or 1. shape = [batch_size, d0, .. dN].
y_pred
The predicted values. shape = [batch_size, d0, .. dN].

Returns

Squared hinge loss values. shape = [batch_size, d0, .. dN-1].

Expand source code
@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
@tf.__internal__.dispatch.add_dispatch_support
def squared_hinge(y_true, y_pred):
  """Computes the squared hinge loss between `y_true` and `y_pred`.

  `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

  Standalone usage:

  >>> y_true = np.random.choice([-1, 1], size=(2, 3))
  >>> y_pred = np.random.random(size=(2, 3))
  >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
  >>> assert loss.shape == (2,)
  >>> assert np.array_equal(
  ...     loss.numpy(),
  ...     np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

  Args:
    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
      If binary (0 or 1) labels are provided we will convert them to -1 or 1.
      shape = `[batch_size, d0, .. dN]`.
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

  Returns:
     Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
  """
  y_pred = tf.convert_to_tensor(y_pred)
  y_true = tf.cast(y_true, y_pred.dtype)
  y_true = _maybe_convert_labels(y_true)
  return backend.mean(
      tf.square(tf.maximum(1. - y_true * y_pred, 0.)), axis=-1)

Classes

class BinaryCrossentropy (from_logits=False, label_smoothing=0, axis=-1, reduction='auto', name='binary_crossentropy')

Computes the cross-entropy loss between true labels and predicted labels.

Use this cross-entropy loss for binary (0 or 1) classification applications. The loss function requires the following inputs:

  • y_true (true label): This is either 0 or 1.
  • y_pred (predicted value): This is the model's prediction, i.e, a single floating-point value which either represents a logit, (i.e, value in [-inf, inf] when from_logits=True) or a probability (i.e, value in [0., 1.] when from_logits=False).

Recommended Usage: (set from_logits=True)

With tf.keras API:

model.compile(
  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
  ....
)

As a standalone function:

>>> # Example 1: (batch_size = 1, number of samples = 4)
>>> y_true = [0, 1, 0, 0]
>>> y_pred = [-18.6, 0.51, 2.94, -12.8]
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Example 2: (batch_size = 2, number of samples = 4)
>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
>>> # Using default 'auto'/'sum_over_batch_size' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Using 'sample_weight' attribute
>>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.243
>>> # Using 'sum' reduction` type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
...     reduction=tf.keras.losses.Reduction.SUM)
>>> bce(y_true, y_pred).numpy()
1.730
>>> # Using 'none' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
...     reduction=tf.keras.losses.Reduction.NONE)
>>> bce(y_true, y_pred).numpy()
array([0.235, 1.496], dtype=float32)

Default Usage: (set from_logits=False)

>>> # Make the following updates to the above "Recommended Usage" section
>>> # 1. Set `from_logits=False`
>>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
>>> # 2. Update <code>y\_pred</code> to use probabilities instead of logits
>>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]

Initializes BinaryCrossentropy instance.

Args

from_logits
Whether to interpret y_pred as a tensor of logit values. By default, we assume that y_pred contains probabilities (i.e., values in [0, 1]).
label_smoothing
Float in [0, 1]. When 0, no smoothing occurs. When > 0, we compute the loss between the predicted labels and a smoothed version of the true labels, where the smoothing squeezes the labels towards 0.5. Larger values of label_smoothing correspond to heavier smoothing.
axis
The axis along which to compute crossentropy (the features axis). Defaults to -1.
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Name for the op. Defaults to 'binary_crossentropy'.
Expand source code
class BinaryCrossentropy(LossFunctionWrapper):
  """Computes the cross-entropy loss between true labels and predicted labels.

  Use this cross-entropy loss for binary (0 or 1) classification applications.
  The loss function requires the following inputs:

  - `y_true` (true label): This is either 0 or 1.
  - `y_pred` (predicted value): This is the model's prediction, i.e, a single
    floating-point value which either represents a
    [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
    when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
    `from_logits=False`).

  **Recommended Usage:** (set `from_logits=True`)

  With `tf.keras` API:

  ```python
  model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    ....
  )
  ```

  As a standalone function:

  >>> # Example 1: (batch_size = 1, number of samples = 4)
  >>> y_true = [0, 1, 0, 0]
  >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
  >>> bce(y_true, y_pred).numpy()
  0.865

  >>> # Example 2: (batch_size = 2, number of samples = 4)
  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
  >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
  >>> bce(y_true, y_pred).numpy()
  0.865
  >>> # Using 'sample_weight' attribute
  >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.243
  >>> # Using 'sum' reduction` type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> bce(y_true, y_pred).numpy()
  1.730
  >>> # Using 'none' reduction type.
  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> bce(y_true, y_pred).numpy()
  array([0.235, 1.496], dtype=float32)

  **Default Usage:** (set `from_logits=False`)

  >>> # Make the following updates to the above "Recommended Usage" section
  >>> # 1. Set `from_logits=False`
  >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
  >>> # 2. Update `y_pred` to use probabilities instead of logits
  >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
  """

  def __init__(self,
               from_logits=False,
               label_smoothing=0,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='binary_crossentropy'):
    """Initializes `BinaryCrossentropy` instance.

    Args:
      from_logits: Whether to interpret `y_pred` as a tensor of
        [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
          assume that `y_pred` contains probabilities (i.e., values in [0, 1]).
      label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0,
        we compute the loss between the predicted labels and a smoothed version
        of the true labels, where the smoothing squeezes the labels towards 0.5.
        Larger values of `label_smoothing` correspond to heavier smoothing.
      axis: The axis along which to compute crossentropy (the features axis).
        Defaults to -1.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Name for the op. Defaults to 'binary_crossentropy'.
    """
    super().__init__(
        binary_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits,
        label_smoothing=label_smoothing,
        axis=axis)
    self.from_logits = from_logits

Ancestors

Inherited members

class CategoricalCrossentropy (from_logits=False, label_smoothing=0, axis=-1, reduction='auto', name='categorical_crossentropy')

Computes the crossentropy loss between the labels and predictions.

Use this crossentropy loss function when there are two or more label classes. We expect labels to be provided in a one_hot representation. If you want to provide labels as integers, please use SparseCategoricalCrossentropy loss. There should be # classes floating point values per feature.

In the snippet below, there is # classes floating pointing values per example. The shape of both y_pred and y_true are [batch_size, num_classes].

Standalone usage:

>>> y_true = [[0, 1, 0], [0, 0, 1]]
>>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> cce = tf.keras.losses.CategoricalCrossentropy()
>>> cce(y_true, y_pred).numpy()
1.177
>>> # Calling with 'sample_weight'.
>>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
0.814
>>> # Using 'sum' reduction type.
>>> cce = tf.keras.losses.CategoricalCrossentropy(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> cce(y_true, y_pred).numpy()
2.354
>>> # Using 'none' reduction type.
>>> cce = tf.keras.losses.CategoricalCrossentropy(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> cce(y_true, y_pred).numpy()
array([0.0513, 2.303], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy())

Initializes CategoricalCrossentropy instance.

Args

from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
label_smoothing
Float in [0, 1]. When > 0, label values are smoothed, meaning the confidence on label values are relaxed. For example, if 0.1, use 0.1 / num_classes for non-target labels and 0.9 + 0.1 / num_classes for target labels.
axis
The axis along which to compute crossentropy (the features axis). Defaults to -1.
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'categorical_crossentropy'.
Expand source code
class CategoricalCrossentropy(LossFunctionWrapper):
  """Computes the crossentropy loss between the labels and predictions.

  Use this crossentropy loss function when there are two or more label classes.
  We expect labels to be provided in a `one_hot` representation. If you want to
  provide labels as integers, please use `SparseCategoricalCrossentropy` loss.
  There should be `# classes` floating point values per feature.

  In the snippet below, there is `# classes` floating pointing values per
  example. The shape of both `y_pred` and `y_true` are
  `[batch_size, num_classes]`.

  Standalone usage:

  >>> y_true = [[0, 1, 0], [0, 0, 1]]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy()
  >>> cce(y_true, y_pred).numpy()
  1.177

  >>> # Calling with 'sample_weight'.
  >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
  0.814

  >>> # Using 'sum' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> cce(y_true, y_pred).numpy()
  2.354

  >>> # Using 'none' reduction type.
  >>> cce = tf.keras.losses.CategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> cce(y_true, y_pred).numpy()
  array([0.0513, 2.303], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy())
  ```
  """

  def __init__(self,
               from_logits=False,
               label_smoothing=0,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='categorical_crossentropy'):
    """Initializes `CategoricalCrossentropy` instance.

    Args:
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
        meaning the confidence on label values are relaxed. For example, if
        `0.1`, use `0.1 / num_classes` for non-target labels and
        `0.9 + 0.1 / num_classes` for target labels.
      axis: The axis along which to compute crossentropy (the features axis).
        Defaults to -1.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
        Defaults to 'categorical_crossentropy'.
    """
    super().__init__(
        categorical_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits,
        label_smoothing=label_smoothing,
        axis=axis)

Ancestors

Inherited members

class CategoricalHinge (reduction='auto', name='categorical_hinge')

Computes the categorical hinge loss between y_true and y_pred.

loss = maximum(neg - pos + 1, 0) where neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> h = tf.keras.losses.CategoricalHinge()
>>> h(y_true, y_pred).numpy()
1.4
>>> # Calling with 'sample_weight'.
>>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.6
>>> # Using 'sum' reduction type.
>>> h = tf.keras.losses.CategoricalHinge(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> h(y_true, y_pred).numpy()
2.8
>>> # Using 'none' reduction type.
>>> h = tf.keras.losses.CategoricalHinge(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> h(y_true, y_pred).numpy()
array([1.2, 1.6], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())

Initializes CategoricalHinge instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'categorical_hinge'.
Expand source code
class CategoricalHinge(LossFunctionWrapper):
  """Computes the categorical hinge loss between `y_true` and `y_pred`.

  `loss = maximum(neg - pos + 1, 0)`
  where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge()
  >>> h(y_true, y_pred).numpy()
  1.4

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.6

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  2.8

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.CategoricalHinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.2, 1.6], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='categorical_hinge'):
    """Initializes `CategoricalHinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'categorical_hinge'.
    """
    super().__init__(categorical_hinge, name=name, reduction=reduction)

Ancestors

Inherited members

class CosineSimilarity (axis=-1, reduction='auto', name='cosine_similarity')

Computes the cosine similarity between labels and predictions.

Note that it is a number between -1 and 1. When it is a negative number between -1 and 0, 0 indicates orthogonality and values closer to -1 indicate greater similarity. The values closer to 1 indicate greater dissimilarity. This makes it usable as a loss function in a setting where you try to maximize the proximity between predictions and targets. If either y_true or y_pred is a zero vector, cosine similarity will be 0 regardless of the proximity between predictions and targets.

loss = -sum(l2_norm(y_true) * l2_norm(y_pred))

Standalone usage:

>>> y_true = [[0., 1.], [1., 1.]]
>>> y_pred = [[1., 0.], [1., 1.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
>>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
>>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
>>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
>>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
>>> #       = -((0. + 0.) +  (0.5 + 0.5)) / 2
>>> cosine_loss(y_true, y_pred).numpy()
-0.5
>>> # Calling with 'sample_weight'.
>>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
-0.0999
>>> # Using 'sum' reduction type.
>>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
...     reduction=tf.keras.losses.Reduction.SUM)
>>> cosine_loss(y_true, y_pred).numpy()
-0.999
>>> # Using 'none' reduction type.
>>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
...     reduction=tf.keras.losses.Reduction.NONE)
>>> cosine_loss(y_true, y_pred).numpy()
array([-0., -0.999], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))

Args

axis
The axis along which the cosine similarity is computed (the features axis). Defaults to -1.
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training [tutorial] (https://www.tensorflow.org/tutorials/distribute/custom_training) for more details.
name
Optional name for the instance.

Initializes LossFunctionWrapper class.

Args

fn
The loss function to wrap, with signature fn(y_true, y_pred, **kwargs).
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance.
**kwargs
The keyword arguments that are passed on to fn.
Expand source code
class CosineSimilarity(LossFunctionWrapper):
  """Computes the cosine similarity between labels and predictions.

  Note that it is a number between -1 and 1. When it is a negative number
  between -1 and 0, 0 indicates orthogonality and values closer to -1
  indicate greater similarity. The values closer to 1 indicate greater
  dissimilarity. This makes it usable as a loss function in a setting
  where you try to maximize the proximity between predictions and targets.
  If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0
  regardless of the proximity between predictions and targets.

  `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

  Standalone usage:

  >>> y_true = [[0., 1.], [1., 1.]]
  >>> y_pred = [[1., 0.], [1., 1.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
  >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
  >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
  >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
  >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
  >>> #       = -((0. + 0.) +  (0.5 + 0.5)) / 2
  >>> cosine_loss(y_true, y_pred).numpy()
  -0.5

  >>> # Calling with 'sample_weight'.
  >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  -0.0999

  >>> # Using 'sum' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> cosine_loss(y_true, y_pred).numpy()
  -0.999

  >>> # Using 'none' reduction type.
  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> cosine_loss(y_true, y_pred).numpy()
  array([-0., -0.999], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))
  ```

  Args:
    axis: The axis along which the cosine similarity is computed
      (the features axis). Defaults to -1.
    reduction: Type of `tf.keras.losses.Reduction` to apply to loss.
      Default value is `AUTO`. `AUTO` indicates that the reduction option will
      be determined by the usage context. For almost all cases this defaults to
      `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
      built-in training loops such as `tf.keras` `compile` and `fit`, using
      `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this
      custom training [tutorial]
      (https://www.tensorflow.org/tutorials/distribute/custom_training) for more
        details.
    name: Optional name for the instance.
  """

  def __init__(self,
               axis=-1,
               reduction=losses_utils.ReductionV2.AUTO,
               name='cosine_similarity'):
    super().__init__(
        cosine_similarity, reduction=reduction, name=name, axis=axis)

Ancestors

Inherited members

class Hinge (reduction='auto', name='hinge')

Computes the hinge loss between y_true and y_pred.

loss = maximum(1 - y_true * y_pred, 0)

y_true values are expected to be -1 or 1. If binary (0 or 1) labels are provided we will convert them to -1 or 1.

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> h = tf.keras.losses.Hinge()
>>> h(y_true, y_pred).numpy()
1.3
>>> # Calling with 'sample_weight'.
>>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.55
>>> # Using 'sum' reduction type.
>>> h = tf.keras.losses.Hinge(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> h(y_true, y_pred).numpy()
2.6
>>> # Using 'none' reduction type.
>>> h = tf.keras.losses.Hinge(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> h(y_true, y_pred).numpy()
array([1.1, 1.5], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())

Initializes Hinge instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'hinge'.
Expand source code
class Hinge(LossFunctionWrapper):
  """Computes the hinge loss between `y_true` and `y_pred`.

  `loss = maximum(1 - y_true * y_pred, 0)`

  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
  provided we will convert them to -1 or 1.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.Hinge()
  >>> h(y_true, y_pred).numpy()
  1.3

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.55

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.Hinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  2.6

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.Hinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.1, 1.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'):
    """Initializes `Hinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'hinge'.
    """
    super().__init__(hinge, name=name, reduction=reduction)

Ancestors

Inherited members

class Huber (delta=1.0, reduction='auto', name='huber_loss')

Computes the Huber loss between y_true and y_pred.

For each value x in error = y_true - y_pred:

loss = 0.5 * x^2                  if |x| <= d
loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d

where d is delta. See: https://en.wikipedia.org/wiki/Huber_loss

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> h = tf.keras.losses.Huber()
>>> h(y_true, y_pred).numpy()
0.155
>>> # Calling with 'sample_weight'.
>>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.09
>>> # Using 'sum' reduction type.
>>> h = tf.keras.losses.Huber(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> h(y_true, y_pred).numpy()
0.31
>>> # Using 'none' reduction type.
>>> h = tf.keras.losses.Huber(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> h(y_true, y_pred).numpy()
array([0.18, 0.13], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())

Initializes Huber instance.

Args

delta
A float, the point where the Huber loss function changes from a quadratic to linear.
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'huber_loss'.
Expand source code
class Huber(LossFunctionWrapper):
  """Computes the Huber loss between `y_true` and `y_pred`.

  For each value x in `error = y_true - y_pred`:

  ```
  loss = 0.5 * x^2                  if |x| <= d
  loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.Huber()
  >>> h(y_true, y_pred).numpy()
  0.155

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.09

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.Huber(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  0.31

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.Huber(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([0.18, 0.13], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())
  ```
  """

  def __init__(self,
               delta=1.0,
               reduction=losses_utils.ReductionV2.AUTO,
               name='huber_loss'):
    """Initializes `Huber` instance.

    Args:
      delta: A float, the point where the Huber loss function changes from a
        quadratic to linear.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'huber_loss'.
    """
    super().__init__(huber, name=name, reduction=reduction, delta=delta)

Ancestors

Inherited members

class KLDivergence (reduction='auto', name='kl_divergence')

Computes Kullback-Leibler divergence loss between y_true and y_pred.

loss = y_true * log(y_true / y_pred)

See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

Standalone usage:

>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> kl = tf.keras.losses.KLDivergence()
>>> kl(y_true, y_pred).numpy()
0.458
>>> # Calling with 'sample_weight'.
>>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.366
>>> # Using 'sum' reduction type.
>>> kl = tf.keras.losses.KLDivergence(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> kl(y_true, y_pred).numpy()
0.916
>>> # Using 'none' reduction type.
>>> kl = tf.keras.losses.KLDivergence(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> kl(y_true, y_pred).numpy()
array([0.916, -3.08e-06], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())

Initializes KLDivergence instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'kl_divergence'.
Expand source code
class KLDivergence(LossFunctionWrapper):
  """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

  `loss = y_true * log(y_true / y_pred)`

  See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

  Standalone usage:

  >>> y_true = [[0, 1], [0, 0]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> kl = tf.keras.losses.KLDivergence()
  >>> kl(y_true, y_pred).numpy()
  0.458

  >>> # Calling with 'sample_weight'.
  >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.366

  >>> # Using 'sum' reduction type.
  >>> kl = tf.keras.losses.KLDivergence(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> kl(y_true, y_pred).numpy()
  0.916

  >>> # Using 'none' reduction type.
  >>> kl = tf.keras.losses.KLDivergence(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> kl(y_true, y_pred).numpy()
  array([0.916, -3.08e-06], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='kl_divergence'):
    """Initializes `KLDivergence` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'kl_divergence'.
    """
    super().__init__(kl_divergence, name=name, reduction=reduction)

Ancestors

Inherited members

class LogCosh (reduction='auto', name='log_cosh')

Computes the logarithm of the hyperbolic cosine of the prediction error.

logcosh = log((exp(x) + exp(-x))/2), where x is the error y_pred - y_true.

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[1., 1.], [0., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> l = tf.keras.losses.LogCosh()
>>> l(y_true, y_pred).numpy()
0.108
>>> # Calling with 'sample_weight'.
>>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.087
>>> # Using 'sum' reduction type.
>>> l = tf.keras.losses.LogCosh(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> l(y_true, y_pred).numpy()
0.217
>>> # Using 'none' reduction type.
>>> l = tf.keras.losses.LogCosh(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> l(y_true, y_pred).numpy()
array([0.217, 0.], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())

Initializes LogCosh instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'log_cosh'.
Expand source code
class LogCosh(LossFunctionWrapper):
  """Computes the logarithm of the hyperbolic cosine of the prediction error.

  `logcosh = log((exp(x) + exp(-x))/2)`,
  where x is the error `y_pred - y_true`.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [0., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> l = tf.keras.losses.LogCosh()
  >>> l(y_true, y_pred).numpy()
  0.108

  >>> # Calling with 'sample_weight'.
  >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.087

  >>> # Using 'sum' reduction type.
  >>> l = tf.keras.losses.LogCosh(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> l(y_true, y_pred).numpy()
  0.217

  >>> # Using 'none' reduction type.
  >>> l = tf.keras.losses.LogCosh(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> l(y_true, y_pred).numpy()
  array([0.217, 0.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='log_cosh'):
    """Initializes `LogCosh` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'log_cosh'.
    """
    super().__init__(log_cosh, name=name, reduction=reduction)

Ancestors

Inherited members

class Loss (reduction='auto', name=None)

Loss base class.

To be implemented by subclasses: * call(): Contains the logic for loss calculation using y_true, y_pred.

Example subclass implementation:

class MeanSquaredError(Loss):

  def call(self, y_true, y_pred):
    y_pred = tf.convert_to_tensor_v2(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)
    return tf.reduce_mean(math_ops.square(y_pred - y_true), axis=-1)

When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, please use 'SUM' or 'NONE' reduction types, and reduce losses explicitly in your training loop. Using 'AUTO' or 'SUM_OVER_BATCH_SIZE' will raise an error.

Please see this custom training tutorial for more details on this.

You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:

with strategy.scope():
  loss_obj = tf.keras.losses.CategoricalCrossentropy(
      reduction=tf.keras.losses.Reduction.NONE)
  ....
  loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
          (1. / global_batch_size))

Initializes Loss class.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance.
Expand source code
class Loss:
  """Loss base class.

  To be implemented by subclasses:
  * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.

  Example subclass implementation:

  ```python
  class MeanSquaredError(Loss):

    def call(self, y_true, y_pred):
      y_pred = tf.convert_to_tensor_v2(y_pred)
      y_true = tf.cast(y_true, y_pred.dtype)
      return tf.reduce_mean(math_ops.square(y_pred - y_true), axis=-1)
  ```

  When used with `tf.distribute.Strategy`, outside of built-in training loops
  such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction
  types, and reduce losses explicitly in your training loop. Using 'AUTO' or
  'SUM_OVER_BATCH_SIZE' will raise an error.

  Please see this custom training [tutorial](
    https://www.tensorflow.org/tutorials/distribute/custom_training) for more
  details on this.

  You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:

  ```python
  with strategy.scope():
    loss_obj = tf.keras.losses.CategoricalCrossentropy(
        reduction=tf.keras.losses.Reduction.NONE)
    ....
    loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
            (1. / global_batch_size))
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):
    """Initializes `Loss` class.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
    """
    losses_utils.ReductionV2.validate(reduction)
    self.reduction = reduction
    self.name = name
    # SUM_OVER_BATCH is only allowed in losses managed by `fit` or
    # CannedEstimators.
    self._allow_sum_over_batch_size = False
    self._set_name_scope()

  def _set_name_scope(self):
    """Creates a valid `name_scope` name."""
    if self.name is None:
      self._name_scope = self.__class__.__name__
    elif self.name == '<lambda>':
      self._name_scope = 'lambda'
    else:
      # E.g. '_my_loss' => 'my_loss'
      self._name_scope = self.name.strip('_')

  def __call__(self, y_true, y_pred, sample_weight=None):
    """Invokes the `Loss` instance.

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
        sparse loss functions such as sparse categorical crossentropy where
        shape = `[batch_size, d0, .. dN-1]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
      sample_weight: Optional `sample_weight` acts as a coefficient for the
        loss. If a scalar is provided, then the loss is simply scaled by the
        given value. If `sample_weight` is a tensor of size `[batch_size]`, then
        the total loss for each sample of the batch is rescaled by the
        corresponding element in the `sample_weight` vector. If the shape of
        `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
        this shape), then each loss element of `y_pred` is scaled
        by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
          functions reduce by 1 dimension, usually axis=-1.)

    Returns:
      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
        shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1`
        because all loss functions reduce by 1 dimension, usually axis=-1.)

    Raises:
      ValueError: If the shape of `sample_weight` is invalid.
    """
    # If we are wrapping a lambda function strip '<>' from the name as it is not
    # accepted in scope name.
    graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
        y_true, y_pred, sample_weight)
    with backend.name_scope(self._name_scope), graph_ctx:
      if tf.executing_eagerly():
        call_fn = self.call
      else:
        call_fn = tf.__internal__.autograph.tf_convert(self.call, tf.__internal__.autograph.control_status_ctx())
      losses = call_fn(y_true, y_pred)
      return losses_utils.compute_weighted_loss(
          losses, sample_weight, reduction=self._get_reduction())

  @classmethod
  def from_config(cls, config):
    """Instantiates a `Loss` from its config (output of `get_config()`).

    Args:
        config: Output of `get_config()`.

    Returns:
        A `Loss` instance.
    """
    return cls(**config)

  def get_config(self):
    """Returns the config dictionary for a `Loss` instance."""
    return {'reduction': self.reduction, 'name': self.name}

  @abc.abstractmethod
  @doc_controls.for_subclass_implementers
  def call(self, y_true, y_pred):
    """Invokes the `Loss` instance.

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
        sparse loss functions such as sparse categorical crossentropy where
        shape = `[batch_size, d0, .. dN-1]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

    Returns:
      Loss values with the shape `[batch_size, d0, .. dN-1]`.
    """
    raise NotImplementedError('Must be implemented in subclasses.')

  def _get_reduction(self):
    """Handles `AUTO` reduction cases and returns the reduction value."""
    if (not self._allow_sum_over_batch_size and
        tf.distribute.has_strategy() and
        (self.reduction == losses_utils.ReductionV2.AUTO or
         self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)):
      raise ValueError(
          'Please use `tf.keras.losses.Reduction.SUM` or '
          '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are '
          'used with `tf.distribute.Strategy` outside of the built-in training '
          'loops. You can implement '
          '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch '
          'size like:\n```\nwith strategy.scope():\n'
          '    loss_obj = tf.keras.losses.CategoricalCrossentropy('
          'reduction=tf.keras.losses.Reduction.NONE)\n....\n'
          '    loss = tf.reduce_sum(loss_obj(labels, predictions)) * '
          '(1. / global_batch_size)\n```\nPlease see '
          'https://www.tensorflow.org/tutorials/distribute/custom_training'
          ' for more details.')

    if self.reduction == losses_utils.ReductionV2.AUTO:
      return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
    return self.reduction

Subclasses

Static methods

def from_config(config)

Instantiates a Loss from its config (output of get_config()).

Args

config
Output of get_config().

Returns

A Loss instance.

Expand source code
@classmethod
def from_config(cls, config):
  """Instantiates a `Loss` from its config (output of `get_config()`).

  Args:
      config: Output of `get_config()`.

  Returns:
      A `Loss` instance.
  """
  return cls(**config)

Methods

def call(self, y_true, y_pred)

Invokes the Loss instance.

Args

y_true
Ground truth values. shape = [batch_size, d0, .. dN], except sparse loss functions such as sparse categorical crossentropy where shape = [batch_size, d0, .. dN-1]
y_pred
The predicted values. shape = [batch_size, d0, .. dN]

Returns

Loss values with the shape [batch_size, d0, .. dN-1].

Expand source code
@abc.abstractmethod
@doc_controls.for_subclass_implementers
def call(self, y_true, y_pred):
  """Invokes the `Loss` instance.

  Args:
    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
      sparse loss functions such as sparse categorical crossentropy where
      shape = `[batch_size, d0, .. dN-1]`
    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

  Returns:
    Loss values with the shape `[batch_size, d0, .. dN-1]`.
  """
  raise NotImplementedError('Must be implemented in subclasses.')
def get_config(self)

Returns the config dictionary for a Loss instance.

Expand source code
def get_config(self):
  """Returns the config dictionary for a `Loss` instance."""
  return {'reduction': self.reduction, 'name': self.name}
class LossFunctionWrapper (fn, reduction='auto', name=None, **kwargs)

Wraps a loss function in the Loss class.

Initializes LossFunctionWrapper class.

Args

fn
The loss function to wrap, with signature fn(y_true, y_pred, **kwargs).
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance.
**kwargs
The keyword arguments that are passed on to fn.
Expand source code
class LossFunctionWrapper(Loss):
  """Wraps a loss function in the `Loss` class."""

  def __init__(self,
               fn,
               reduction=losses_utils.ReductionV2.AUTO,
               name=None,
               **kwargs):
    """Initializes `LossFunctionWrapper` class.

    Args:
      fn: The loss function to wrap, with signature `fn(y_true, y_pred,
        **kwargs)`.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance.
      **kwargs: The keyword arguments that are passed on to `fn`.
    """
    super().__init__(reduction=reduction, name=name)
    self.fn = fn
    self._fn_kwargs = kwargs

  def call(self, y_true, y_pred):
    """Invokes the `LossFunctionWrapper` instance.

    Args:
      y_true: Ground truth values.
      y_pred: The predicted values.

    Returns:
      Loss values per sample.
    """
    if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
      y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)

    ag_fn = tf.__internal__.autograph.tf_convert(self.fn, tf.__internal__.autograph.control_status_ctx())
    return ag_fn(y_true, y_pred, **self._fn_kwargs)

  def get_config(self):
    config = {}
    for k, v in self._fn_kwargs.items():
      config[k] = backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v
    base_config = super().get_config()
    return dict(list(base_config.items()) + list(config.items()))

Ancestors

Subclasses

Methods

def call(self, y_true, y_pred)

Invokes the LossFunctionWrapper instance.

Args

y_true
Ground truth values.
y_pred
The predicted values.

Returns

Loss values per sample.

Expand source code
def call(self, y_true, y_pred):
  """Invokes the `LossFunctionWrapper` instance.

  Args:
    y_true: Ground truth values.
    y_pred: The predicted values.

  Returns:
    Loss values per sample.
  """
  if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
    y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)

  ag_fn = tf.__internal__.autograph.tf_convert(self.fn, tf.__internal__.autograph.control_status_ctx())
  return ag_fn(y_true, y_pred, **self._fn_kwargs)

Inherited members

class MeanAbsoluteError (reduction='auto', name='mean_absolute_error')

Computes the mean of absolute difference between labels and predictions.

loss = abs(y_true - y_pred)

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[1., 1.], [1., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> mae = tf.keras.losses.MeanAbsoluteError()
>>> mae(y_true, y_pred).numpy()
0.5
>>> # Calling with 'sample_weight'.
>>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
0.25
>>> # Using 'sum' reduction type.
>>> mae = tf.keras.losses.MeanAbsoluteError(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> mae(y_true, y_pred).numpy()
1.0
>>> # Using 'none' reduction type.
>>> mae = tf.keras.losses.MeanAbsoluteError(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> mae(y_true, y_pred).numpy()
array([0.5, 0.5], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())

Initializes MeanAbsoluteError instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'mean_absolute_error'.
Expand source code
class MeanAbsoluteError(LossFunctionWrapper):
  """Computes the mean of absolute difference between labels and predictions.

  `loss = abs(y_true - y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError()
  >>> mae(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.25

  >>> # Using 'sum' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mae(y_true, y_pred).numpy()
  1.0

  >>> # Using 'none' reduction type.
  >>> mae = tf.keras.losses.MeanAbsoluteError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mae(y_true, y_pred).numpy()
  array([0.5, 0.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_absolute_error'):
    """Initializes `MeanAbsoluteError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'mean_absolute_error'.
    """
    super().__init__(mean_absolute_error, name=name, reduction=reduction)

Ancestors

Inherited members

class MeanAbsolutePercentageError (reduction='auto', name='mean_absolute_percentage_error')

Computes the mean absolute percentage error between y_true and y_pred.

loss = 100 * abs(y_true - y_pred) / y_true

Standalone usage:

>>> y_true = [[2., 1.], [2., 3.]]
>>> y_pred = [[1., 1.], [1., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> mape = tf.keras.losses.MeanAbsolutePercentageError()
>>> mape(y_true, y_pred).numpy()
50.
>>> # Calling with 'sample_weight'.
>>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
20.
>>> # Using 'sum' reduction type.
>>> mape = tf.keras.losses.MeanAbsolutePercentageError(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> mape(y_true, y_pred).numpy()
100.
>>> # Using 'none' reduction type.
>>> mape = tf.keras.losses.MeanAbsolutePercentageError(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> mape(y_true, y_pred).numpy()
array([25., 75.], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd',
              loss=tf.keras.losses.MeanAbsolutePercentageError())

Initializes MeanAbsolutePercentageError instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'mean_absolute_percentage_error'.
Expand source code
class MeanAbsolutePercentageError(LossFunctionWrapper):
  """Computes the mean absolute percentage error between `y_true` and `y_pred`.

  `loss = 100 * abs(y_true - y_pred) / y_true`

  Standalone usage:

  >>> y_true = [[2., 1.], [2., 3.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
  >>> mape(y_true, y_pred).numpy()
  50.

  >>> # Calling with 'sample_weight'.
  >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  20.

  >>> # Using 'sum' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mape(y_true, y_pred).numpy()
  100.

  >>> # Using 'none' reduction type.
  >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mape(y_true, y_pred).numpy()
  array([25., 75.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.MeanAbsolutePercentageError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_absolute_percentage_error'):
    """Initializes `MeanAbsolutePercentageError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'mean_absolute_percentage_error'.
    """
    super().__init__(
        mean_absolute_percentage_error, name=name, reduction=reduction)

Ancestors

Inherited members

class MeanSquaredError (reduction='auto', name='mean_squared_error')

Computes the mean of squares of errors between labels and predictions.

loss = square(y_true - y_pred)

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[1., 1.], [1., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> mse = tf.keras.losses.MeanSquaredError()
>>> mse(y_true, y_pred).numpy()
0.5
>>> # Calling with 'sample_weight'.
>>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
0.25
>>> # Using 'sum' reduction type.
>>> mse = tf.keras.losses.MeanSquaredError(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> mse(y_true, y_pred).numpy()
1.0
>>> # Using 'none' reduction type.
>>> mse = tf.keras.losses.MeanSquaredError(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> mse(y_true, y_pred).numpy()
array([0.5, 0.5], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())

Initializes MeanSquaredError instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'mean_squared_error'.
Expand source code
class MeanSquaredError(LossFunctionWrapper):
  """Computes the mean of squares of errors between labels and predictions.

  `loss = square(y_true - y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError()
  >>> mse(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.25

  >>> # Using 'sum' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> mse(y_true, y_pred).numpy()
  1.0

  >>> # Using 'none' reduction type.
  >>> mse = tf.keras.losses.MeanSquaredError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> mse(y_true, y_pred).numpy()
  array([0.5, 0.5], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_squared_error'):
    """Initializes `MeanSquaredError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'mean_squared_error'.
    """
    super().__init__(mean_squared_error, name=name, reduction=reduction)

Ancestors

Inherited members

class MeanSquaredLogarithmicError (reduction='auto', name='mean_squared_logarithmic_error')

Computes the mean squared logarithmic error between y_true and y_pred.

loss = square(log(y_true + 1.) - log(y_pred + 1.))

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[1., 1.], [1., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
>>> msle(y_true, y_pred).numpy()
0.240
>>> # Calling with 'sample_weight'.
>>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
0.120
>>> # Using 'sum' reduction type.
>>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> msle(y_true, y_pred).numpy()
0.480
>>> # Using 'none' reduction type.
>>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> msle(y_true, y_pred).numpy()
array([0.240, 0.240], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd',
              loss=tf.keras.losses.MeanSquaredLogarithmicError())

Initializes MeanSquaredLogarithmicError instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'mean_squared_logarithmic_error'.
Expand source code
class MeanSquaredLogarithmicError(LossFunctionWrapper):
  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

  `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [1., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
  >>> msle(y_true, y_pred).numpy()
  0.240

  >>> # Calling with 'sample_weight'.
  >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
  0.120

  >>> # Using 'sum' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> msle(y_true, y_pred).numpy()
  0.480

  >>> # Using 'none' reduction type.
  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> msle(y_true, y_pred).numpy()
  array([0.240, 0.240], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.MeanSquaredLogarithmicError())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='mean_squared_logarithmic_error'):
    """Initializes `MeanSquaredLogarithmicError` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'mean_squared_logarithmic_error'.
    """
    super().__init__(
        mean_squared_logarithmic_error, name=name, reduction=reduction)

Ancestors

Inherited members

class Poisson (reduction='auto', name='poisson')

Computes the Poisson loss between y_true and y_pred.

loss = y_pred - y_true * log(y_pred)

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[1., 1.], [0., 0.]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> p = tf.keras.losses.Poisson()
>>> p(y_true, y_pred).numpy()
0.5
>>> # Calling with 'sample_weight'.
>>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.4
>>> # Using 'sum' reduction type.
>>> p = tf.keras.losses.Poisson(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> p(y_true, y_pred).numpy()
0.999
>>> # Using 'none' reduction type.
>>> p = tf.keras.losses.Poisson(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> p(y_true, y_pred).numpy()
array([0.999, 0.], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())

Initializes Poisson instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'poisson'.
Expand source code
class Poisson(LossFunctionWrapper):
  """Computes the Poisson loss between `y_true` and `y_pred`.

  `loss = y_pred - y_true * log(y_pred)`

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[1., 1.], [0., 0.]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> p = tf.keras.losses.Poisson()
  >>> p(y_true, y_pred).numpy()
  0.5

  >>> # Calling with 'sample_weight'.
  >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
  0.4

  >>> # Using 'sum' reduction type.
  >>> p = tf.keras.losses.Poisson(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> p(y_true, y_pred).numpy()
  0.999

  >>> # Using 'none' reduction type.
  >>> p = tf.keras.losses.Poisson(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> p(y_true, y_pred).numpy()
  array([0.999, 0.], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())
  ```
  """

  def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'):
    """Initializes `Poisson` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'poisson'.
    """
    super().__init__(poisson, name=name, reduction=reduction)

Ancestors

Inherited members

class SparseCategoricalCrossentropy (from_logits=False, reduction='auto', name='sparse_categorical_crossentropy')

Computes the crossentropy loss between the labels and predictions.

Use this crossentropy loss function when there are two or more label classes. We expect labels to be provided as integers. If you want to provide labels using one-hot representation, please use CategoricalCrossentropy loss. There should be # classes floating point values per feature for y_pred and a single floating point value per feature for y_true.

In the snippet below, there is a single floating point value per example for y_true and # classes floating pointing values per example for y_pred. The shape of y_true is [batch_size] and the shape of y_pred is [batch_size, num_classes].

Standalone usage:

>>> y_true = [1, 2]
>>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
>>> scce(y_true, y_pred).numpy()
1.177
>>> # Calling with 'sample_weight'.
>>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
0.814
>>> # Using 'sum' reduction type.
>>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> scce(y_true, y_pred).numpy()
2.354
>>> # Using 'none' reduction type.
>>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> scce(y_true, y_pred).numpy()
array([0.0513, 2.303], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd',
              loss=tf.keras.losses.SparseCategoricalCrossentropy())

Initializes SparseCategoricalCrossentropy instance.

Args

from_logits
Whether y_pred is expected to be a logits tensor. By default, we assume that y_pred encodes a probability distribution.
reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'sparse_categorical_crossentropy'.
Expand source code
class SparseCategoricalCrossentropy(LossFunctionWrapper):
  """Computes the crossentropy loss between the labels and predictions.

  Use this crossentropy loss function when there are two or more label classes.
  We expect labels to be provided as integers. If you want to provide labels
  using `one-hot` representation, please use `CategoricalCrossentropy` loss.
  There should be `# classes` floating point values per feature for `y_pred`
  and a single floating point value per feature for `y_true`.

  In the snippet below, there is a single floating point value per example for
  `y_true` and `# classes` floating pointing values per example for `y_pred`.
  The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
  `[batch_size, num_classes]`.

  Standalone usage:

  >>> y_true = [1, 2]
  >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
  >>> scce(y_true, y_pred).numpy()
  1.177

  >>> # Calling with 'sample_weight'.
  >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
  0.814

  >>> # Using 'sum' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> scce(y_true, y_pred).numpy()
  2.354

  >>> # Using 'none' reduction type.
  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> scce(y_true, y_pred).numpy()
  array([0.0513, 2.303], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd',
                loss=tf.keras.losses.SparseCategoricalCrossentropy())
  ```
  """

  def __init__(self,
               from_logits=False,
               reduction=losses_utils.ReductionV2.AUTO,
               name='sparse_categorical_crossentropy'):
    """Initializes `SparseCategoricalCrossentropy` instance.

    Args:
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to
        'sparse_categorical_crossentropy'.
    """
    super().__init__(
        sparse_categorical_crossentropy,
        name=name,
        reduction=reduction,
        from_logits=from_logits)

Ancestors

Inherited members

class SquaredHinge (reduction='auto', name='squared_hinge')

Computes the squared hinge loss between y_true and y_pred.

loss = square(maximum(1 - y_true * y_pred, 0))

y_true values are expected to be -1 or 1. If binary (0 or 1) labels are provided we will convert them to -1 or 1.

Standalone usage:

>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> h = tf.keras.losses.SquaredHinge()
>>> h(y_true, y_pred).numpy()
1.86
>>> # Calling with 'sample_weight'.
>>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.73
>>> # Using 'sum' reduction type.
>>> h = tf.keras.losses.SquaredHinge(
...     reduction=tf.keras.losses.Reduction.SUM)
>>> h(y_true, y_pred).numpy()
3.72
>>> # Using 'none' reduction type.
>>> h = tf.keras.losses.SquaredHinge(
...     reduction=tf.keras.losses.Reduction.NONE)
>>> h(y_true, y_pred).numpy()
array([1.46, 2.26], dtype=float32)

Usage with the compile() API:

model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())

Initializes SquaredHinge instance.

Args

reduction
Type of tf.keras.losses.Reduction to apply to loss. Default value is AUTO. AUTO indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to SUM_OVER_BATCH_SIZE. When used with tf.distribute.Strategy, outside of built-in training loops such as tf.keras compile and fit, using AUTO or SUM_OVER_BATCH_SIZE will raise an error. Please see this custom training tutorial for more details.
name
Optional name for the instance. Defaults to 'squared_hinge'.
Expand source code
class SquaredHinge(LossFunctionWrapper):
  """Computes the squared hinge loss between `y_true` and `y_pred`.

  `loss = square(maximum(1 - y_true * y_pred, 0))`

  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
  provided we will convert them to -1 or 1.

  Standalone usage:

  >>> y_true = [[0., 1.], [0., 0.]]
  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
  >>> h = tf.keras.losses.SquaredHinge()
  >>> h(y_true, y_pred).numpy()
  1.86

  >>> # Calling with 'sample_weight'.
  >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
  0.73

  >>> # Using 'sum' reduction type.
  >>> h = tf.keras.losses.SquaredHinge(
  ...     reduction=tf.keras.losses.Reduction.SUM)
  >>> h(y_true, y_pred).numpy()
  3.72

  >>> # Using 'none' reduction type.
  >>> h = tf.keras.losses.SquaredHinge(
  ...     reduction=tf.keras.losses.Reduction.NONE)
  >>> h(y_true, y_pred).numpy()
  array([1.46, 2.26], dtype=float32)

  Usage with the `compile()` API:

  ```python
  model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())
  ```
  """

  def __init__(self,
               reduction=losses_utils.ReductionV2.AUTO,
               name='squared_hinge'):
    """Initializes `SquaredHinge` instance.

    Args:
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
        will raise an error. Please see this custom training [tutorial](
          https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
      name: Optional name for the instance. Defaults to 'squared_hinge'.
    """
    super().__init__(squared_hinge, name=name, reduction=reduction)

Ancestors

Inherited members