Module keras.mixed_precision.test_util
Contains testing utilities related to mixed precision.
Expand source code
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains testing utilities related to mixed precision."""
import tensorflow.compat.v2 as tf
from keras import regularizers
from keras.engine import base_layer
def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None):
"""Returns a function that asserts it's gradient has a certain value.
This serves as a hook to assert intermediate gradients have a certain value.
This returns an identity function. The identity's gradient function is also
the identity function, except it asserts that the gradient equals
`expected_gradient` and has dtype `expected_dtype`.
Args:
expected_gradient: The gradient function asserts that the gradient is this
value.
expected_dtype: The gradient function asserts the gradient has this dtype.
Returns:
An identity function whose gradient function asserts the gradient has a
certain value.
"""
@tf.custom_gradient
def _identity_with_grad_check(x):
"""Function that asserts it's gradient has a certain value."""
x = tf.identity(x)
def grad(dx):
"""Gradient function that asserts the gradient has a certain value."""
if expected_dtype:
assert dx.dtype == expected_dtype, (
'dx.dtype should be %s but is: %s' % (expected_dtype, dx.dtype))
expected_tensor = tf.convert_to_tensor(
expected_gradient, dtype=dx.dtype, name='expected_gradient')
# Control dependency is to ensure input is available. It's possible the
# dataset will throw a StopIteration to indicate there is no more data, in
# which case we don't want to run the assertion.
with tf.control_dependencies([x]):
assert_op = tf.compat.v1.assert_equal(dx, expected_tensor)
with tf.control_dependencies([assert_op]):
dx = tf.identity(dx)
return dx
return x, grad
# Keras sometimes has trouble serializing Lambda layers with a decorated
# function. So we define and return a non-decorated function.
def identity_with_grad_check(x):
return _identity_with_grad_check(x)
return identity_with_grad_check
def create_identity_with_nan_gradients_fn(have_nan_gradients):
"""Returns a function that optionally has NaN gradients.
This serves as a hook to introduce NaN gradients to a model. This returns an
identity function. The identity's gradient function will check if the boolean
tensor `have_nan_gradients` is True. If so, the gradient will be NaN.
Otherwise, the gradient will also be the identity.
Args:
have_nan_gradients: A scalar boolean tensor. If True, gradients will be NaN.
Otherwise, the gradient function is the identity function.
Returns:
An identity function whose gradient function will return NaNs, if
`have_nan_gradients` is True.
"""
@tf.custom_gradient
def _identity_with_nan_gradients(x):
"""Function whose gradient is NaN iff `have_nan_gradients` is True."""
x = tf.identity(x)
def grad(dx):
return tf.cond(
have_nan_gradients,
lambda: dx * float('NaN'),
lambda: dx
)
return x, grad
# Keras sometimes has trouble serializing Lambda layers with a decorated
# function. So we define and return a non-decorated function.
def identity_with_nan_gradients(x):
return _identity_with_nan_gradients(x)
return identity_with_nan_gradients
class AssertTypeLayer(base_layer.Layer):
"""A layer which asserts it's inputs are a certain type."""
def __init__(self, assert_type=None, **kwargs):
self._assert_type = (tf.as_dtype(assert_type).name if assert_type
else None)
super(AssertTypeLayer, self).__init__(**kwargs)
def assert_input_types(self, inputs):
"""Asserts `inputs` are of the correct type. Should be called in call()."""
if self._assert_type:
inputs_flattened = tf.nest.flatten(inputs)
for inp in inputs_flattened:
assert inp.dtype.base_dtype == self._assert_type, (
'Input tensor has type %s which does not match assert type %s' %
(inp.dtype.name, self._assert_type))
class MultiplyLayer(AssertTypeLayer):
"""A layer which multiplies its input by a scalar variable."""
def __init__(self,
regularizer=None,
activity_regularizer=None,
use_operator=False,
var_name='v',
**kwargs):
"""Initializes the MultiplyLayer.
Args:
regularizer: The weight regularizer on the scalar variable.
activity_regularizer: The activity regularizer.
use_operator: If True, add using the * operator. If False, add using
tf.multiply.
var_name: The name of the variable. It can be useful to pass a name other
than 'v', to test having the attribute name (self.v) being different
from the variable name.
**kwargs: Passed to AssertTypeLayer constructor.
"""
self._regularizer = regularizer
if isinstance(regularizer, dict):
self._regularizer = regularizers.deserialize(regularizer,
custom_objects=globals())
self._activity_regularizer = activity_regularizer
if isinstance(activity_regularizer, dict):
self._activity_regularizer = regularizers.deserialize(
activity_regularizer, custom_objects=globals())
self._use_operator = use_operator
self._var_name = var_name
super(MultiplyLayer, self).__init__(
activity_regularizer=self._activity_regularizer, **kwargs)
def build(self, _):
self.v = self.add_weight(
self._var_name, (), initializer='ones', regularizer=self._regularizer)
self.built = True
def call(self, inputs):
self.assert_input_types(inputs)
return self._multiply(inputs, self.v)
def _multiply(self, x, y):
if self._use_operator:
return x * y
else:
return tf.multiply(x, y)
def get_config(self):
config = super(MultiplyLayer, self).get_config()
config['regularizer'] = regularizers.serialize(self._regularizer)
config['activity_regularizer'] = regularizers.serialize(
self._activity_regularizer)
config['use_operator'] = self._use_operator
config['var_name'] = self._var_name
config['assert_type'] = self._assert_type
return config
class MultiplyLayerWithoutAutoCast(MultiplyLayer):
"""Same as MultiplyLayer, but does not use AutoCastVariables."""
def build(self, _):
dtype = self.dtype
if dtype in ('float16', 'bfloat16'):
dtype = 'float32'
self.v = self.add_weight(
'v', (),
initializer='ones',
dtype=dtype,
experimental_autocast=False,
regularizer=self._regularizer)
self.built = True
def call(self, inputs):
self.assert_input_types(inputs)
assert self.v.dtype in (tf.float32, tf.float64)
return self._multiply(inputs, tf.cast(self.v, inputs.dtype))
class IdentityRegularizer(regularizers.Regularizer):
def __call__(self, x):
assert x.dtype == tf.float32
return tf.identity(x)
def get_config(self):
return {}
class ReduceSumRegularizer(regularizers.Regularizer):
def __call__(self, x):
return tf.reduce_sum(x)
def get_config(self):
return {}
Functions
def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None)
-
Returns a function that asserts it's gradient has a certain value.
This serves as a hook to assert intermediate gradients have a certain value. This returns an identity function. The identity's gradient function is also the identity function, except it asserts that the gradient equals
expected_gradient
and has dtypeexpected_dtype
.Args
expected_gradient
- The gradient function asserts that the gradient is this value.
expected_dtype
- The gradient function asserts the gradient has this dtype.
Returns
An identity function whose gradient function asserts the gradient has a certain value.
Expand source code
def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None): """Returns a function that asserts it's gradient has a certain value. This serves as a hook to assert intermediate gradients have a certain value. This returns an identity function. The identity's gradient function is also the identity function, except it asserts that the gradient equals `expected_gradient` and has dtype `expected_dtype`. Args: expected_gradient: The gradient function asserts that the gradient is this value. expected_dtype: The gradient function asserts the gradient has this dtype. Returns: An identity function whose gradient function asserts the gradient has a certain value. """ @tf.custom_gradient def _identity_with_grad_check(x): """Function that asserts it's gradient has a certain value.""" x = tf.identity(x) def grad(dx): """Gradient function that asserts the gradient has a certain value.""" if expected_dtype: assert dx.dtype == expected_dtype, ( 'dx.dtype should be %s but is: %s' % (expected_dtype, dx.dtype)) expected_tensor = tf.convert_to_tensor( expected_gradient, dtype=dx.dtype, name='expected_gradient') # Control dependency is to ensure input is available. It's possible the # dataset will throw a StopIteration to indicate there is no more data, in # which case we don't want to run the assertion. with tf.control_dependencies([x]): assert_op = tf.compat.v1.assert_equal(dx, expected_tensor) with tf.control_dependencies([assert_op]): dx = tf.identity(dx) return dx return x, grad # Keras sometimes has trouble serializing Lambda layers with a decorated # function. So we define and return a non-decorated function. def identity_with_grad_check(x): return _identity_with_grad_check(x) return identity_with_grad_check
def create_identity_with_nan_gradients_fn(have_nan_gradients)
-
Returns a function that optionally has NaN gradients.
This serves as a hook to introduce NaN gradients to a model. This returns an identity function. The identity's gradient function will check if the boolean tensor
have_nan_gradients
is True. If so, the gradient will be NaN. Otherwise, the gradient will also be the identity.Args
have_nan_gradients
- A scalar boolean tensor. If True, gradients will be NaN. Otherwise, the gradient function is the identity function.
Returns
An identity function whose gradient function will return NaNs, if
have_nan_gradients
is True.Expand source code
def create_identity_with_nan_gradients_fn(have_nan_gradients): """Returns a function that optionally has NaN gradients. This serves as a hook to introduce NaN gradients to a model. This returns an identity function. The identity's gradient function will check if the boolean tensor `have_nan_gradients` is True. If so, the gradient will be NaN. Otherwise, the gradient will also be the identity. Args: have_nan_gradients: A scalar boolean tensor. If True, gradients will be NaN. Otherwise, the gradient function is the identity function. Returns: An identity function whose gradient function will return NaNs, if `have_nan_gradients` is True. """ @tf.custom_gradient def _identity_with_nan_gradients(x): """Function whose gradient is NaN iff `have_nan_gradients` is True.""" x = tf.identity(x) def grad(dx): return tf.cond( have_nan_gradients, lambda: dx * float('NaN'), lambda: dx ) return x, grad # Keras sometimes has trouble serializing Lambda layers with a decorated # function. So we define and return a non-decorated function. def identity_with_nan_gradients(x): return _identity_with_nan_gradients(x) return identity_with_nan_gradients
Classes
class AssertTypeLayer (assert_type=None, **kwargs)
-
A layer which asserts it's inputs are a certain type.
Expand source code
class AssertTypeLayer(base_layer.Layer): """A layer which asserts it's inputs are a certain type.""" def __init__(self, assert_type=None, **kwargs): self._assert_type = (tf.as_dtype(assert_type).name if assert_type else None) super(AssertTypeLayer, self).__init__(**kwargs) def assert_input_types(self, inputs): """Asserts `inputs` are of the correct type. Should be called in call().""" if self._assert_type: inputs_flattened = tf.nest.flatten(inputs) for inp in inputs_flattened: assert inp.dtype.base_dtype == self._assert_type, ( 'Input tensor has type %s which does not match assert type %s' % (inp.dtype.name, self._assert_type))
Ancestors
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Subclasses
Methods
def assert_input_types(self, inputs)
-
Asserts
inputs
are of the correct type. Should be called in call().Expand source code
def assert_input_types(self, inputs): """Asserts `inputs` are of the correct type. Should be called in call().""" if self._assert_type: inputs_flattened = tf.nest.flatten(inputs) for inp in inputs_flattened: assert inp.dtype.base_dtype == self._assert_type, ( 'Input tensor has type %s which does not match assert type %s' % (inp.dtype.name, self._assert_type))
Inherited members
Layer
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
build
call
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights
class IdentityRegularizer
-
Regularizer base class.
Regularizers allow you to apply penalties on layer parameters or layer activity during optimization. These penalties are summed into the loss function that the network optimizes.
Regularization penalties are applied on a per-layer basis. The exact API will depend on the layer, but many layers (e.g.
Dense
,Conv1D
,Conv2D
andConv3D
) have a unified API.These layers expose 3 keyword arguments:
kernel_regularizer
: Regularizer to apply a penalty on the layer's kernelbias_regularizer
: Regularizer to apply a penalty on the layer's biasactivity_regularizer
: Regularizer to apply a penalty on the layer's output
All layers (including custom layers) expose
activity_regularizer
as a settable property, whether or not it is in the constructor arguments.The value returned by the
activity_regularizer
is divided by the input batch size so that the relative weighting between the weight regularizers and the activity regularizers does not change with the batch size.You can access a layer's regularization penalties by calling
layer.losses
after calling the layer on inputs.Example
>>> layer = tf.keras.layers.Dense( ... 5, input_dim=5, ... kernel_initializer='ones', ... kernel_regularizer=tf.keras.regularizers.L1(0.01), ... activity_regularizer=tf.keras.regularizers.L2(0.01)) >>> tensor = tf.ones(shape=(5, 5)) * 2.0 >>> out = layer(tensor)
>>> # The kernel regularization term is 0.25 >>> # The activity regularization term (after dividing by the batch size) is 5 >>> tf.math.reduce_sum(layer.losses) <tf.Tensor: shape=(), dtype=float32, numpy=5.25>
Available penalties
tf.keras.regularizers.L1(0.3) # L1 Regularization Penalty tf.keras.regularizers.L2(0.1) # L2 Regularization Penalty tf.keras.regularizers.L1L2(l1=0.01, l2=0.01) # L1 + L2 penalties
Directly calling a regularizer
Compute a regularization loss on a tensor by directly calling a regularizer as if it is a one-argument function.
E.g.
>>> regularizer = tf.keras.regularizers.L2(2.) >>> tensor = tf.ones(shape=(5, 5)) >>> regularizer(tensor) <tf.Tensor: shape=(), dtype=float32, numpy=50.0>
Developing new regularizers
Any function that takes in a weight matrix and returns a scalar tensor can be used as a regularizer, e.g.:
>>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l1') ... def l1_reg(weight_matrix): ... return 0.01 * tf.math.reduce_sum(tf.math.abs(weight_matrix)) ... >>> layer = tf.keras.layers.Dense(5, input_dim=5, ... kernel_initializer='ones', kernel_regularizer=l1_reg) >>> tensor = tf.ones(shape=(5, 5)) >>> out = layer(tensor) >>> layer.losses [<tf.Tensor: shape=(), dtype=float32, numpy=0.25>]
Alternatively, you can write your custom regularizers in an object-oriented way by extending this regularizer base class, e.g.:
>>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l2') ... class L2Regularizer(tf.keras.regularizers.Regularizer): ... def __init__(self, l2=0.): # pylint: disable=redefined-outer-name ... self.l2 = l2 ... ... def __call__(self, x): ... return self.l2 * tf.math.reduce_sum(tf.math.square(x)) ... ... def get_config(self): ... return {'l2': float(self.l2)} ... >>> layer = tf.keras.layers.Dense( ... 5, input_dim=5, kernel_initializer='ones', ... kernel_regularizer=L2Regularizer(l2=0.5))
>>> tensor = tf.ones(shape=(5, 5)) >>> out = layer(tensor) >>> layer.losses [<tf.Tensor: shape=(), dtype=float32, numpy=12.5>]
A note on serialization and deserialization:
Registering the regularizers as serializable is optional if you are just training and executing models, exporting to and from SavedModels, or saving and loading weight checkpoints.
Registration is required for Keras
model_to_estimator
, saving and loading models to HDF5 formats, Keras model cloning, some visualization utilities, and exporting models to and from JSON. If using this functionality, you must make sure any python process running your model has also defined and registered your custom regularizer.tf.keras.utils.register_keras_serializable
is only available in TF 2.1 and beyond. In earlier versions of TensorFlow you must pass your custom regularizer to thecustom_objects
argument of methods that expect custom regularizers to be registered as serializable.Expand source code
class IdentityRegularizer(regularizers.Regularizer): def __call__(self, x): assert x.dtype == tf.float32 return tf.identity(x) def get_config(self): return {}
Ancestors
Inherited members
class MultiplyLayer (regularizer=None, activity_regularizer=None, use_operator=False, var_name='v', **kwargs)
-
A layer which multiplies its input by a scalar variable.
Initializes the MultiplyLayer.
Args
regularizer
- The weight regularizer on the scalar variable.
activity_regularizer
- The activity regularizer.
use_operator
- If True, add using the * operator. If False, add using tf.multiply.
var_name
- The name of the variable. It can be useful to pass a name other than 'v', to test having the attribute name (self.v) being different from the variable name.
**kwargs
- Passed to AssertTypeLayer constructor.
Expand source code
class MultiplyLayer(AssertTypeLayer): """A layer which multiplies its input by a scalar variable.""" def __init__(self, regularizer=None, activity_regularizer=None, use_operator=False, var_name='v', **kwargs): """Initializes the MultiplyLayer. Args: regularizer: The weight regularizer on the scalar variable. activity_regularizer: The activity regularizer. use_operator: If True, add using the * operator. If False, add using tf.multiply. var_name: The name of the variable. It can be useful to pass a name other than 'v', to test having the attribute name (self.v) being different from the variable name. **kwargs: Passed to AssertTypeLayer constructor. """ self._regularizer = regularizer if isinstance(regularizer, dict): self._regularizer = regularizers.deserialize(regularizer, custom_objects=globals()) self._activity_regularizer = activity_regularizer if isinstance(activity_regularizer, dict): self._activity_regularizer = regularizers.deserialize( activity_regularizer, custom_objects=globals()) self._use_operator = use_operator self._var_name = var_name super(MultiplyLayer, self).__init__( activity_regularizer=self._activity_regularizer, **kwargs) def build(self, _): self.v = self.add_weight( self._var_name, (), initializer='ones', regularizer=self._regularizer) self.built = True def call(self, inputs): self.assert_input_types(inputs) return self._multiply(inputs, self.v) def _multiply(self, x, y): if self._use_operator: return x * y else: return tf.multiply(x, y) def get_config(self): config = super(MultiplyLayer, self).get_config() config['regularizer'] = regularizers.serialize(self._regularizer) config['activity_regularizer'] = regularizers.serialize( self._activity_regularizer) config['use_operator'] = self._use_operator config['var_name'] = self._var_name config['assert_type'] = self._assert_type return config
Ancestors
- AssertTypeLayer
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Subclasses
Inherited members
AssertTypeLayer
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
assert_input_types
build
call
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights
class MultiplyLayerWithoutAutoCast (regularizer=None, activity_regularizer=None, use_operator=False, var_name='v', **kwargs)
-
Same as MultiplyLayer, but does not use AutoCastVariables.
Initializes the MultiplyLayer.
Args
regularizer
- The weight regularizer on the scalar variable.
activity_regularizer
- The activity regularizer.
use_operator
- If True, add using the * operator. If False, add using tf.multiply.
var_name
- The name of the variable. It can be useful to pass a name other than 'v', to test having the attribute name (self.v) being different from the variable name.
**kwargs
- Passed to AssertTypeLayer constructor.
Expand source code
class MultiplyLayerWithoutAutoCast(MultiplyLayer): """Same as MultiplyLayer, but does not use AutoCastVariables.""" def build(self, _): dtype = self.dtype if dtype in ('float16', 'bfloat16'): dtype = 'float32' self.v = self.add_weight( 'v', (), initializer='ones', dtype=dtype, experimental_autocast=False, regularizer=self._regularizer) self.built = True def call(self, inputs): self.assert_input_types(inputs) assert self.v.dtype in (tf.float32, tf.float64) return self._multiply(inputs, tf.cast(self.v, inputs.dtype))
Ancestors
- MultiplyLayer
- AssertTypeLayer
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Inherited members
MultiplyLayer
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
assert_input_types
build
call
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights
class ReduceSumRegularizer
-
Regularizer base class.
Regularizers allow you to apply penalties on layer parameters or layer activity during optimization. These penalties are summed into the loss function that the network optimizes.
Regularization penalties are applied on a per-layer basis. The exact API will depend on the layer, but many layers (e.g.
Dense
,Conv1D
,Conv2D
andConv3D
) have a unified API.These layers expose 3 keyword arguments:
kernel_regularizer
: Regularizer to apply a penalty on the layer's kernelbias_regularizer
: Regularizer to apply a penalty on the layer's biasactivity_regularizer
: Regularizer to apply a penalty on the layer's output
All layers (including custom layers) expose
activity_regularizer
as a settable property, whether or not it is in the constructor arguments.The value returned by the
activity_regularizer
is divided by the input batch size so that the relative weighting between the weight regularizers and the activity regularizers does not change with the batch size.You can access a layer's regularization penalties by calling
layer.losses
after calling the layer on inputs.Example
>>> layer = tf.keras.layers.Dense( ... 5, input_dim=5, ... kernel_initializer='ones', ... kernel_regularizer=tf.keras.regularizers.L1(0.01), ... activity_regularizer=tf.keras.regularizers.L2(0.01)) >>> tensor = tf.ones(shape=(5, 5)) * 2.0 >>> out = layer(tensor)
>>> # The kernel regularization term is 0.25 >>> # The activity regularization term (after dividing by the batch size) is 5 >>> tf.math.reduce_sum(layer.losses) <tf.Tensor: shape=(), dtype=float32, numpy=5.25>
Available penalties
tf.keras.regularizers.L1(0.3) # L1 Regularization Penalty tf.keras.regularizers.L2(0.1) # L2 Regularization Penalty tf.keras.regularizers.L1L2(l1=0.01, l2=0.01) # L1 + L2 penalties
Directly calling a regularizer
Compute a regularization loss on a tensor by directly calling a regularizer as if it is a one-argument function.
E.g.
>>> regularizer = tf.keras.regularizers.L2(2.) >>> tensor = tf.ones(shape=(5, 5)) >>> regularizer(tensor) <tf.Tensor: shape=(), dtype=float32, numpy=50.0>
Developing new regularizers
Any function that takes in a weight matrix and returns a scalar tensor can be used as a regularizer, e.g.:
>>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l1') ... def l1_reg(weight_matrix): ... return 0.01 * tf.math.reduce_sum(tf.math.abs(weight_matrix)) ... >>> layer = tf.keras.layers.Dense(5, input_dim=5, ... kernel_initializer='ones', kernel_regularizer=l1_reg) >>> tensor = tf.ones(shape=(5, 5)) >>> out = layer(tensor) >>> layer.losses [<tf.Tensor: shape=(), dtype=float32, numpy=0.25>]
Alternatively, you can write your custom regularizers in an object-oriented way by extending this regularizer base class, e.g.:
>>> @tf.keras.utils.register_keras_serializable(package='Custom', name='l2') ... class L2Regularizer(tf.keras.regularizers.Regularizer): ... def __init__(self, l2=0.): # pylint: disable=redefined-outer-name ... self.l2 = l2 ... ... def __call__(self, x): ... return self.l2 * tf.math.reduce_sum(tf.math.square(x)) ... ... def get_config(self): ... return {'l2': float(self.l2)} ... >>> layer = tf.keras.layers.Dense( ... 5, input_dim=5, kernel_initializer='ones', ... kernel_regularizer=L2Regularizer(l2=0.5))
>>> tensor = tf.ones(shape=(5, 5)) >>> out = layer(tensor) >>> layer.losses [<tf.Tensor: shape=(), dtype=float32, numpy=12.5>]
A note on serialization and deserialization:
Registering the regularizers as serializable is optional if you are just training and executing models, exporting to and from SavedModels, or saving and loading weight checkpoints.
Registration is required for Keras
model_to_estimator
, saving and loading models to HDF5 formats, Keras model cloning, some visualization utilities, and exporting models to and from JSON. If using this functionality, you must make sure any python process running your model has also defined and registered your custom regularizer.tf.keras.utils.register_keras_serializable
is only available in TF 2.1 and beyond. In earlier versions of TensorFlow you must pass your custom regularizer to thecustom_objects
argument of methods that expect custom regularizers to be registered as serializable.Expand source code
class ReduceSumRegularizer(regularizers.Regularizer): def __call__(self, x): return tf.reduce_sum(x) def get_config(self): return {}
Ancestors
Inherited members