Module keras.utils.losses_utils
Utilities related to loss functions.
Expand source code
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=protected-access
"""Utilities related to loss functions."""
import tensorflow.compat.v2 as tf
from keras import backend
from keras.engine import keras_tensor
from tensorflow.python.util.tf_export import keras_export
@keras_export('keras.losses.Reduction', v1=[])
class ReductionV2(object):
"""Types of loss reduction.
Contains the following values:
* `AUTO`: Indicates that the reduction option will be determined by the usage
context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When
used with `tf.distribute.Strategy`, outside of built-in training loops such
as `tf.keras` `compile` and `fit`, we expect reduction value to be
`SUM` or `NONE`. Using `AUTO` in that case will raise an error.
* `NONE`: No **additional** reduction is applied to the output of the wrapped
loss function. When non-scalar losses are returned to Keras functions like
`fit`/`evaluate`, the unreduced vector loss is passed to the optimizer
but the reported loss will be a scalar value.
Caution: **Verify the shape of the outputs when using** `Reduction.NONE`.
The builtin loss functions wrapped by the loss classes reduce
one dimension (`axis=-1`, or `axis` if specified by loss function).
`Reduction.NONE` just means that no **additional** reduction is applied by
the class wrapper. For categorical losses with an example input shape of
`[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For
pointwise losses your must include a dummy axis so that `[batch, W, H, 1]`
is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]`
will be incorrectly reduced to `[batch, W]`.
* `SUM`: Scalar sum of weighted losses.
* `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
This reduction type is not supported when used with
`tf.distribute.Strategy` outside of built-in training loops like `tf.keras`
`compile`/`fit`.
You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
```
with strategy.scope():
loss_obj = tf.keras.losses.CategoricalCrossentropy(
reduction=tf.keras.losses.Reduction.NONE)
....
loss = tf.reduce_sum(loss_obj(labels, predictions)) *
(1. / global_batch_size)
```
Please see the [custom training guide](
https://www.tensorflow.org/tutorials/distribute/custom_training) for more
details on this.
"""
AUTO = 'auto'
NONE = 'none'
SUM = 'sum'
SUM_OVER_BATCH_SIZE = 'sum_over_batch_size'
@classmethod
def all(cls):
return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
@classmethod
def validate(cls, key):
if key not in cls.all():
raise ValueError('Invalid Reduction Key %s.' % key)
def remove_squeezable_dimensions(
labels, predictions, expected_rank_diff=0, name=None):
"""Squeeze last dim if ranks differ from expected by exactly 1.
In the common case where we expect shapes to match, `expected_rank_diff`
defaults to 0, and we squeeze the last dimension of the larger rank if they
differ by 1.
But, for example, if `labels` contains class IDs and `predictions` contains 1
probability per class, we expect `predictions` to have 1 more dimension than
`labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
`labels` if `rank(predictions) - rank(labels) == 0`, and
`predictions` if `rank(predictions) - rank(labels) == 2`.
This will use static shape if available. Otherwise, it will add graph
operations, which could result in a performance hit.
Args:
labels: Label values, a `Tensor` whose dimensions match `predictions`.
predictions: Predicted values, a `Tensor` of arbitrary dimensions.
expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
name: Name of the op.
Returns:
Tuple of `labels` and `predictions`, possibly with last dim squeezed.
"""
with backend.name_scope(name or 'remove_squeezable_dimensions'):
if not isinstance(predictions, tf.RaggedTensor):
predictions = tf.convert_to_tensor(predictions)
if not isinstance(labels, tf.RaggedTensor):
labels = tf.convert_to_tensor(labels)
predictions_shape = predictions.shape
predictions_rank = predictions_shape.ndims
labels_shape = labels.shape
labels_rank = labels_shape.ndims
if (labels_rank is not None) and (predictions_rank is not None):
# Use static rank.
rank_diff = predictions_rank - labels_rank
if (rank_diff == expected_rank_diff + 1 and
predictions_shape.dims[-1].is_compatible_with(1)):
predictions = tf.squeeze(predictions, [-1])
elif (rank_diff == expected_rank_diff - 1 and
labels_shape.dims[-1].is_compatible_with(1)):
labels = tf.squeeze(labels, [-1])
return labels, predictions
# Use dynamic rank.
rank_diff = tf.rank(predictions) - tf.rank(labels)
if (predictions_rank is None) or (
predictions_shape.dims[-1].is_compatible_with(1)):
predictions = tf.cond(
tf.equal(expected_rank_diff + 1, rank_diff),
lambda: tf.squeeze(predictions, [-1]),
lambda: predictions)
if (labels_rank is None) or (
labels_shape.dims[-1].is_compatible_with(1)):
labels = tf.cond(
tf.equal(expected_rank_diff - 1, rank_diff),
lambda: tf.squeeze(labels, [-1]),
lambda: labels)
return labels, predictions
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None):
"""Squeeze or expand last dimension if needed.
1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
(using `remove_squeezable_dimensions`).
2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
from the new rank of `y_pred`.
If `sample_weight` is scalar, it is kept scalar.
This will use static shape if available. Otherwise, it will add graph
operations, which could result in a performance hit.
Args:
y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
y_true: Optional label `Tensor` whose dimensions match `y_pred`.
sample_weight: Optional weight scalar or `Tensor` whose dimensions match
`y_pred`.
Returns:
Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
the last dimension squeezed,
`sample_weight` could be extended by one dimension.
If `sample_weight` is None, (y_pred, y_true) is returned.
"""
y_pred_shape = y_pred.shape
y_pred_rank = y_pred_shape.ndims
if y_true is not None:
# If sparse matrix is provided as `y_true`, the last dimension in `y_pred`
# may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)),
# y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3))
# In this case, we should not try to remove squeezable dimension.
y_true_shape = y_true.shape
y_true_rank = y_true_shape.ndims
if (y_true_rank is not None) and (y_pred_rank is not None):
# Use static rank for `y_true` and `y_pred`.
if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
y_true, y_pred = remove_squeezable_dimensions(
y_true, y_pred)
else:
# Use dynamic rank.
rank_diff = tf.rank(y_pred) - tf.rank(y_true)
squeeze_dims = lambda: remove_squeezable_dimensions( # pylint: disable=g-long-lambda
y_true, y_pred)
is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1])
maybe_squeeze_dims = lambda: tf.cond( # pylint: disable=g-long-lambda
is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred))
y_true, y_pred = tf.cond(
tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims)
if sample_weight is None:
return y_pred, y_true
weights_shape = sample_weight.shape
weights_rank = weights_shape.ndims
if weights_rank == 0: # If weights is scalar, do nothing.
return y_pred, y_true, sample_weight
if (y_pred_rank is not None) and (weights_rank is not None):
# Use static rank.
if weights_rank - y_pred_rank == 1:
sample_weight = tf.squeeze(sample_weight, [-1])
elif y_pred_rank - weights_rank == 1:
sample_weight = tf.expand_dims(sample_weight, [-1])
return y_pred, y_true, sample_weight
# Use dynamic rank.
weights_rank_tensor = tf.rank(sample_weight)
rank_diff = weights_rank_tensor - tf.rank(y_pred)
maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1])
def _maybe_expand_weights():
expand_weights = lambda: tf.expand_dims(sample_weight, [-1])
return tf.cond(
tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight)
def _maybe_adjust_weights():
return tf.cond(
tf.equal(rank_diff, 1), maybe_squeeze_weights,
_maybe_expand_weights)
# squeeze or expand last dim of `sample_weight` if its rank differs by 1
# from the new rank of `y_pred`.
sample_weight = tf.cond(
tf.equal(weights_rank_tensor, 0), lambda: sample_weight,
_maybe_adjust_weights)
return y_pred, y_true, sample_weight
def _safe_mean(losses, num_present):
"""Computes a safe mean of the losses.
Args:
losses: `Tensor` whose elements contain individual loss measurements.
num_present: The number of measurable elements in `losses`.
Returns:
A scalar representing the mean of `losses`. If `num_present` is zero,
then zero is returned.
"""
total_loss = tf.reduce_sum(losses)
return tf.math.divide_no_nan(total_loss, num_present, name='value')
def _num_elements(losses):
"""Computes the number of elements in `losses` tensor."""
with backend.name_scope('num_elements') as scope:
return tf.cast(tf.size(losses, name=scope), dtype=losses.dtype)
def reduce_weighted_loss(weighted_losses,
reduction=ReductionV2.SUM_OVER_BATCH_SIZE):
"""Reduces the individual weighted loss measurements."""
if reduction == ReductionV2.NONE:
loss = weighted_losses
else:
loss = tf.reduce_sum(weighted_losses)
if reduction == ReductionV2.SUM_OVER_BATCH_SIZE:
loss = _safe_mean(loss, _num_elements(weighted_losses))
return loss
@keras_export('keras.__internal__.losses.compute_weighted_loss', v1=[])
def compute_weighted_loss(losses,
sample_weight=None,
reduction=ReductionV2.SUM_OVER_BATCH_SIZE,
name=None):
"""Computes the weighted loss.
Args:
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
`losses`, or be broadcastable to `losses`.
reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
Default value is `SUM_OVER_BATCH_SIZE`.
name: Optional name for the op.
Raises:
ValueError: If the shape of `sample_weight` is not compatible with `losses`.
Returns:
Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
`NONE`, this has the same shape as `losses`; otherwise, it is scalar.
"""
ReductionV2.validate(reduction)
# If this function is called directly, then we just default 'AUTO' to
# 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases.
if reduction == ReductionV2.AUTO:
reduction = ReductionV2.SUM_OVER_BATCH_SIZE
if sample_weight is None:
sample_weight = 1.0
with backend.name_scope(name or 'weighted_loss'):
# Save the `reduction` argument for loss normalization when distributing
# to multiple replicas. Used only for estimator + v1 optimizer flow.
tf.compat.v1.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access
if not isinstance(losses,
(keras_tensor.KerasTensor, tf.RaggedTensor)):
losses = tf.convert_to_tensor(losses)
input_dtype = losses.dtype
if not isinstance(sample_weight, keras_tensor.KerasTensor):
sample_weight = tf.convert_to_tensor(sample_weight)
# TODO(psv): Handle casting here in a better way, eg. if losses is float64
# we do not want to lose precision.
losses = tf.cast(losses, 'float32')
sample_weight = tf.cast(sample_weight, 'float32')
# Update dimensions of `sample_weight` to match with `losses` if possible.
losses, _, sample_weight = squeeze_or_expand_dimensions( # pylint: disable=unbalanced-tuple-unpacking
losses, None, sample_weight)
weighted_losses = tf.multiply(losses, sample_weight)
# Apply reduction function to the individual weighted losses.
loss = reduce_weighted_loss(weighted_losses, reduction)
# Convert the result back to the input type.
loss = tf.cast(loss, input_dtype)
return loss
def scale_loss_for_distribution(loss_value):
"""Scales and returns the given loss value by the number of replicas."""
num_replicas = (
tf.distribute.get_strategy().num_replicas_in_sync)
if num_replicas > 1:
loss_value *= (1. / num_replicas)
return loss_value
def cast_losses_to_common_dtype(losses):
"""Cast a list of losses to a common dtype.
If any loss is floating-point, they will all be casted to the most-precise
floating-point loss. Otherwise the losses are not casted. We also skip casting
losses if there are any complex losses.
Args:
losses: A list of losses.
Returns:
`losses`, but they have been casted to a common dtype.
"""
highest_float = None
for loss in losses:
if loss.dtype.is_floating:
if highest_float is None or loss.dtype.size > highest_float.size:
highest_float = loss.dtype
elif {loss.dtype, highest_float} == {'bfloat16', 'float16'}:
highest_float = 'float32'
if loss.dtype.is_complex:
return losses # If we find any complex losses, do not cast any losses
if highest_float:
losses = [tf.cast(loss, highest_float) for loss in losses]
return losses
Functions
def cast_losses_to_common_dtype(losses)
-
Cast a list of losses to a common dtype.
If any loss is floating-point, they will all be casted to the most-precise floating-point loss. Otherwise the losses are not casted. We also skip casting losses if there are any complex losses.
Args
losses
- A list of losses.
Returns
losses
, but they have been casted to a common dtype.Expand source code
def cast_losses_to_common_dtype(losses): """Cast a list of losses to a common dtype. If any loss is floating-point, they will all be casted to the most-precise floating-point loss. Otherwise the losses are not casted. We also skip casting losses if there are any complex losses. Args: losses: A list of losses. Returns: `losses`, but they have been casted to a common dtype. """ highest_float = None for loss in losses: if loss.dtype.is_floating: if highest_float is None or loss.dtype.size > highest_float.size: highest_float = loss.dtype elif {loss.dtype, highest_float} == {'bfloat16', 'float16'}: highest_float = 'float32' if loss.dtype.is_complex: return losses # If we find any complex losses, do not cast any losses if highest_float: losses = [tf.cast(loss, highest_float) for loss in losses] return losses
def compute_weighted_loss(losses, sample_weight=None, reduction='sum_over_batch_size', name=None)
-
Computes the weighted loss.
Args
losses
Tensor
of shape[batch_size, d1, … dN]
.sample_weight
- Optional
Tensor
whose rank is either 0, or the same rank aslosses
, or be broadcastable tolosses
. reduction
- (Optional) Type of
tf.keras.losses.Reduction
to apply to loss. Default value isSUM_OVER_BATCH_SIZE
. name
- Optional name for the op.
Raises
ValueError
- If the shape of
sample_weight
is not compatible withlosses
.
Returns
Weighted loss
Tensor
of the same type aslosses
. Ifreduction
isNONE
, this has the same shape aslosses
; otherwise, it is scalar.Expand source code
@keras_export('keras.__internal__.losses.compute_weighted_loss', v1=[]) def compute_weighted_loss(losses, sample_weight=None, reduction=ReductionV2.SUM_OVER_BATCH_SIZE, name=None): """Computes the weighted loss. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `losses`, or be broadcastable to `losses`. reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. Default value is `SUM_OVER_BATCH_SIZE`. name: Optional name for the op. Raises: ValueError: If the shape of `sample_weight` is not compatible with `losses`. Returns: Weighted loss `Tensor` of the same type as `losses`. If `reduction` is `NONE`, this has the same shape as `losses`; otherwise, it is scalar. """ ReductionV2.validate(reduction) # If this function is called directly, then we just default 'AUTO' to # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases. if reduction == ReductionV2.AUTO: reduction = ReductionV2.SUM_OVER_BATCH_SIZE if sample_weight is None: sample_weight = 1.0 with backend.name_scope(name or 'weighted_loss'): # Save the `reduction` argument for loss normalization when distributing # to multiple replicas. Used only for estimator + v1 optimizer flow. tf.compat.v1.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access if not isinstance(losses, (keras_tensor.KerasTensor, tf.RaggedTensor)): losses = tf.convert_to_tensor(losses) input_dtype = losses.dtype if not isinstance(sample_weight, keras_tensor.KerasTensor): sample_weight = tf.convert_to_tensor(sample_weight) # TODO(psv): Handle casting here in a better way, eg. if losses is float64 # we do not want to lose precision. losses = tf.cast(losses, 'float32') sample_weight = tf.cast(sample_weight, 'float32') # Update dimensions of `sample_weight` to match with `losses` if possible. losses, _, sample_weight = squeeze_or_expand_dimensions( # pylint: disable=unbalanced-tuple-unpacking losses, None, sample_weight) weighted_losses = tf.multiply(losses, sample_weight) # Apply reduction function to the individual weighted losses. loss = reduce_weighted_loss(weighted_losses, reduction) # Convert the result back to the input type. loss = tf.cast(loss, input_dtype) return loss
def reduce_weighted_loss(weighted_losses, reduction='sum_over_batch_size')
-
Reduces the individual weighted loss measurements.
Expand source code
def reduce_weighted_loss(weighted_losses, reduction=ReductionV2.SUM_OVER_BATCH_SIZE): """Reduces the individual weighted loss measurements.""" if reduction == ReductionV2.NONE: loss = weighted_losses else: loss = tf.reduce_sum(weighted_losses) if reduction == ReductionV2.SUM_OVER_BATCH_SIZE: loss = _safe_mean(loss, _num_elements(weighted_losses)) return loss
def remove_squeezable_dimensions(labels, predictions, expected_rank_diff=0, name=None)
-
Squeeze last dim if ranks differ from expected by exactly 1.
In the common case where we expect shapes to match,
expected_rank_diff
defaults to 0, and we squeeze the last dimension of the larger rank if they differ by 1.But, for example, if
labels
contains class IDs andpredictions
contains 1 probability per class, we expectpredictions
to have 1 more dimension thanlabels
, soexpected_rank_diff
would be 1. In this case, we'd squeezelabels
ifrank(predictions) - rank(labels) == 0
, andpredictions
ifrank(predictions) - rank(labels) == 2
.This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit.
Args
labels
- Label values, a
Tensor
whose dimensions matchpredictions
. predictions
- Predicted values, a
Tensor
of arbitrary dimensions. expected_rank_diff
- Expected result of
rank(predictions) - rank(labels)
. name
- Name of the op.
Returns
Tuple of
labels
andpredictions
, possibly with last dim squeezed.Expand source code
def remove_squeezable_dimensions( labels, predictions, expected_rank_diff=0, name=None): """Squeeze last dim if ranks differ from expected by exactly 1. In the common case where we expect shapes to match, `expected_rank_diff` defaults to 0, and we squeeze the last dimension of the larger rank if they differ by 1. But, for example, if `labels` contains class IDs and `predictions` contains 1 probability per class, we expect `predictions` to have 1 more dimension than `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and `predictions` if `rank(predictions) - rank(labels) == 2`. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: labels: Label values, a `Tensor` whose dimensions match `predictions`. predictions: Predicted values, a `Tensor` of arbitrary dimensions. expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. name: Name of the op. Returns: Tuple of `labels` and `predictions`, possibly with last dim squeezed. """ with backend.name_scope(name or 'remove_squeezable_dimensions'): if not isinstance(predictions, tf.RaggedTensor): predictions = tf.convert_to_tensor(predictions) if not isinstance(labels, tf.RaggedTensor): labels = tf.convert_to_tensor(labels) predictions_shape = predictions.shape predictions_rank = predictions_shape.ndims labels_shape = labels.shape labels_rank = labels_shape.ndims if (labels_rank is not None) and (predictions_rank is not None): # Use static rank. rank_diff = predictions_rank - labels_rank if (rank_diff == expected_rank_diff + 1 and predictions_shape.dims[-1].is_compatible_with(1)): predictions = tf.squeeze(predictions, [-1]) elif (rank_diff == expected_rank_diff - 1 and labels_shape.dims[-1].is_compatible_with(1)): labels = tf.squeeze(labels, [-1]) return labels, predictions # Use dynamic rank. rank_diff = tf.rank(predictions) - tf.rank(labels) if (predictions_rank is None) or ( predictions_shape.dims[-1].is_compatible_with(1)): predictions = tf.cond( tf.equal(expected_rank_diff + 1, rank_diff), lambda: tf.squeeze(predictions, [-1]), lambda: predictions) if (labels_rank is None) or ( labels_shape.dims[-1].is_compatible_with(1)): labels = tf.cond( tf.equal(expected_rank_diff - 1, rank_diff), lambda: tf.squeeze(labels, [-1]), lambda: labels) return labels, predictions
def scale_loss_for_distribution(loss_value)
-
Scales and returns the given loss value by the number of replicas.
Expand source code
def scale_loss_for_distribution(loss_value): """Scales and returns the given loss value by the number of replicas.""" num_replicas = ( tf.distribute.get_strategy().num_replicas_in_sync) if num_replicas > 1: loss_value *= (1. / num_replicas) return loss_value
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None)
-
Squeeze or expand last dimension if needed.
- Squeezes last dim of
y_pred
ory_true
if their rank differs by 1 (usingremove_squeezable_dimensions()
). - Squeezes or expands last dim of
sample_weight
if its rank differs by 1 from the new rank ofy_pred
. Ifsample_weight
is scalar, it is kept scalar.
This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit.
Args
y_pred
- Predicted values, a
Tensor
of arbitrary dimensions. y_true
- Optional label
Tensor
whose dimensions matchy_pred
. sample_weight
- Optional weight scalar or
Tensor
whose dimensions matchy_pred
.
Returns
Tuple of
y_pred
,y_true
andsample_weight
. Each of them possibly has the last dimension squeezed,sample_weight
could be extended by one dimension. Ifsample_weight
is None, (y_pred, y_true) is returned.Expand source code
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): """Squeeze or expand last dimension if needed. 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 (using `remove_squeezable_dimensions`). 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 from the new rank of `y_pred`. If `sample_weight` is scalar, it is kept scalar. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: y_pred: Predicted values, a `Tensor` of arbitrary dimensions. y_true: Optional label `Tensor` whose dimensions match `y_pred`. sample_weight: Optional weight scalar or `Tensor` whose dimensions match `y_pred`. Returns: Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has the last dimension squeezed, `sample_weight` could be extended by one dimension. If `sample_weight` is None, (y_pred, y_true) is returned. """ y_pred_shape = y_pred.shape y_pred_rank = y_pred_shape.ndims if y_true is not None: # If sparse matrix is provided as `y_true`, the last dimension in `y_pred` # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) # In this case, we should not try to remove squeezable dimension. y_true_shape = y_true.shape y_true_rank = y_true_shape.ndims if (y_true_rank is not None) and (y_pred_rank is not None): # Use static rank for `y_true` and `y_pred`. if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: y_true, y_pred = remove_squeezable_dimensions( y_true, y_pred) else: # Use dynamic rank. rank_diff = tf.rank(y_pred) - tf.rank(y_true) squeeze_dims = lambda: remove_squeezable_dimensions( # pylint: disable=g-long-lambda y_true, y_pred) is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1]) maybe_squeeze_dims = lambda: tf.cond( # pylint: disable=g-long-lambda is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)) y_true, y_pred = tf.cond( tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims) if sample_weight is None: return y_pred, y_true weights_shape = sample_weight.shape weights_rank = weights_shape.ndims if weights_rank == 0: # If weights is scalar, do nothing. return y_pred, y_true, sample_weight if (y_pred_rank is not None) and (weights_rank is not None): # Use static rank. if weights_rank - y_pred_rank == 1: sample_weight = tf.squeeze(sample_weight, [-1]) elif y_pred_rank - weights_rank == 1: sample_weight = tf.expand_dims(sample_weight, [-1]) return y_pred, y_true, sample_weight # Use dynamic rank. weights_rank_tensor = tf.rank(sample_weight) rank_diff = weights_rank_tensor - tf.rank(y_pred) maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1]) def _maybe_expand_weights(): expand_weights = lambda: tf.expand_dims(sample_weight, [-1]) return tf.cond( tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight) def _maybe_adjust_weights(): return tf.cond( tf.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights) # squeeze or expand last dim of `sample_weight` if its rank differs by 1 # from the new rank of `y_pred`. sample_weight = tf.cond( tf.equal(weights_rank_tensor, 0), lambda: sample_weight, _maybe_adjust_weights) return y_pred, y_true, sample_weight
- Squeezes last dim of
Classes
class ReductionV2
-
Types of loss reduction.
Contains the following values:
AUTO
: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults toSUM_OVER_BATCH_SIZE
. When used withtf.distribute.Strategy
, outside of built-in training loops such astf.keras
compile
andfit
, we expect reduction value to beSUM
orNONE
. UsingAUTO
in that case will raise an error.NONE
: No additional reduction is applied to the output of the wrapped loss function. When non-scalar losses are returned to Keras functions likefit
/evaluate
, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value.
Caution: Verify the shape of the outputs when using
Reduction.NONE
. The builtin loss functions wrapped by the loss classes reduce one dimension (axis=-1
, oraxis
if specified by loss function).Reduction.NONE
just means that no additional reduction is applied by the class wrapper. For categorical losses with an example input shape of[batch, W, H, n_classes]
then_classes
dimension is reduced. For pointwise losses your must include a dummy axis so that[batch, W, H, 1]
is reduced to[batch, W, H]
. Without the dummy axis[batch, W, H]
will be incorrectly reduced to[batch, W]
.SUM
: Scalar sum of weighted losses.SUM_OVER_BATCH_SIZE
: ScalarSUM
divided by number of elements in losses. This reduction type is not supported when used withtf.distribute.Strategy
outside of built-in training loops liketf.keras
compile
/fit
.
You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
with strategy.scope(): loss_obj = tf.keras.losses.CategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE) .... loss = tf.reduce_sum(loss_obj(labels, predictions)) * (1. / global_batch_size)
Please see the custom training guide for more details on this.
Expand source code
class ReductionV2(object): """Types of loss reduction. Contains the following values: * `AUTO`: Indicates that the reduction option will be determined by the usage context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of built-in training loops such as `tf.keras` `compile` and `fit`, we expect reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will raise an error. * `NONE`: No **additional** reduction is applied to the output of the wrapped loss function. When non-scalar losses are returned to Keras functions like `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but the reported loss will be a scalar value. Caution: **Verify the shape of the outputs when using** `Reduction.NONE`. The builtin loss functions wrapped by the loss classes reduce one dimension (`axis=-1`, or `axis` if specified by loss function). `Reduction.NONE` just means that no **additional** reduction is applied by the class wrapper. For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For pointwise losses your must include a dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]` will be incorrectly reduced to `[batch, W]`. * `SUM`: Scalar sum of weighted losses. * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. This reduction type is not supported when used with `tf.distribute.Strategy` outside of built-in training loops like `tf.keras` `compile`/`fit`. You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like: ``` with strategy.scope(): loss_obj = tf.keras.losses.CategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE) .... loss = tf.reduce_sum(loss_obj(labels, predictions)) * (1. / global_batch_size) ``` Please see the [custom training guide]( https://www.tensorflow.org/tutorials/distribute/custom_training) for more details on this. """ AUTO = 'auto' NONE = 'none' SUM = 'sum' SUM_OVER_BATCH_SIZE = 'sum_over_batch_size' @classmethod def all(cls): return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE) @classmethod def validate(cls, key): if key not in cls.all(): raise ValueError('Invalid Reduction Key %s.' % key)
Class variables
var AUTO
var NONE
var SUM
var SUM_OVER_BATCH_SIZE
Static methods
def all()
-
Expand source code
@classmethod def all(cls): return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
def validate(key)
-
Expand source code
@classmethod def validate(cls, key): if key not in cls.all(): raise ValueError('Invalid Reduction Key %s.' % key)