Module keras.api.keras.experimental
Public API for tf.keras.experimental namespace.
Expand source code
# This file is MACHINE GENERATED! Do not edit.
# Generated by: tensorflow/python/tools/api/generator/create_python_api.py script.
"""Public API for tf.keras.experimental namespace.
"""
from __future__ import print_function as _print_function
import sys as _sys
from keras.feature_column.sequence_feature_column import SequenceFeatures
from keras.layers.recurrent import PeepholeLSTMCell
from keras.optimizer_v2.learning_rate_schedule import CosineDecay
from keras.optimizer_v2.learning_rate_schedule import CosineDecayRestarts
from keras.premade.linear import LinearModel
from keras.premade.wide_deep import WideDeepModel
from keras.saving.saved_model_experimental import export_saved_model
from keras.saving.saved_model_experimental import load_from_saved_model
del _print_function
from tensorflow.python.util import module_wrapper as _module_wrapper
if not isinstance(_sys.modules[__name__], _module_wrapper.TFModuleWrapper):
_sys.modules[__name__] = _module_wrapper.TFModuleWrapper(
_sys.modules[__name__], "keras.experimental", public_apis=None, deprecation=True,
has_lite=False)
Functions
def export_saved_model(model, saved_model_path, custom_objects=None, as_text=False, input_signature=None, serving_only=False)
-
Exports a
tf.keras.Model
as a Tensorflow SavedModel.Note that at this time, subclassed models can only be saved using
serving_only=True
.The exported
SavedModel
is a standalone serialization of Tensorflow objects, and is supported by TF language APIs and the Tensorflow Serving system. To load the model, use the functiontf.keras.experimental.load_from_saved_model
.The
SavedModel
contains:- a checkpoint containing the model weights.
- a
SavedModel
proto containing the Tensorflow backend graph. Separate graphs are saved for prediction (serving), train, and evaluation. If the model has not been compiled, then only the graph computing predictions will be exported. - the model's json config. If the model is subclassed, this will only be
included if the model's
get_config()
method is overwritten.
Example:
import tensorflow as tf # Create a tf.keras model. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, input_shape=[10])) model.summary() # Save the tf.keras model in the SavedModel format. path = '/tmp/simple_keras_model' tf.keras.experimental.export_saved_model(model, path) # Load the saved keras model back. new_model = tf.keras.experimental.load_from_saved_model(path) new_model.summary()
Args
model
- A
tf.keras.Model
to be saved. If the model is subclassed, the flagserving_only
must be set to True. saved_model_path
- a string specifying the path to the SavedModel directory.
custom_objects
- Optional dictionary mapping string names to custom classes or functions (e.g. custom loss functions).
as_text
- bool,
False
by default. Whether to write theSavedModel
proto in text format. Currently unavailable in serving-only mode. input_signature
- A possibly nested sequence of
tf.TensorSpec
objects, used to specify the expected model inputs. Seetf.function
for more details. serving_only
- bool,
False
by default. When this is true, only the prediction graph is saved.
Raises
NotImplementedError
- If the model is a subclassed model, and serving_only is False.
ValueError
- If the input signature cannot be inferred from the model.
AssertionError
- If the SavedModel directory already exists and isn't empty.
Expand source code
@keras_export(v1=['keras.experimental.export_saved_model']) def export_saved_model(model, saved_model_path, custom_objects=None, as_text=False, input_signature=None, serving_only=False): """Exports a `tf.keras.Model` as a Tensorflow SavedModel. Note that at this time, subclassed models can only be saved using `serving_only=True`. The exported `SavedModel` is a standalone serialization of Tensorflow objects, and is supported by TF language APIs and the Tensorflow Serving system. To load the model, use the function `tf.keras.experimental.load_from_saved_model`. The `SavedModel` contains: 1. a checkpoint containing the model weights. 2. a `SavedModel` proto containing the Tensorflow backend graph. Separate graphs are saved for prediction (serving), train, and evaluation. If the model has not been compiled, then only the graph computing predictions will be exported. 3. the model's json config. If the model is subclassed, this will only be included if the model's `get_config()` method is overwritten. Example: ```python import tensorflow as tf # Create a tf.keras model. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, input_shape=[10])) model.summary() # Save the tf.keras model in the SavedModel format. path = '/tmp/simple_keras_model' tf.keras.experimental.export_saved_model(model, path) # Load the saved keras model back. new_model = tf.keras.experimental.load_from_saved_model(path) new_model.summary() ``` Args: model: A `tf.keras.Model` to be saved. If the model is subclassed, the flag `serving_only` must be set to True. saved_model_path: a string specifying the path to the SavedModel directory. custom_objects: Optional dictionary mapping string names to custom classes or functions (e.g. custom loss functions). as_text: bool, `False` by default. Whether to write the `SavedModel` proto in text format. Currently unavailable in serving-only mode. input_signature: A possibly nested sequence of `tf.TensorSpec` objects, used to specify the expected model inputs. See `tf.function` for more details. serving_only: bool, `False` by default. When this is true, only the prediction graph is saved. Raises: NotImplementedError: If the model is a subclassed model, and serving_only is False. ValueError: If the input signature cannot be inferred from the model. AssertionError: If the SavedModel directory already exists and isn't empty. """ warnings.warn('`tf.keras.experimental.export_saved_model` is deprecated' 'and will be removed in a future version. ' 'Please use `model.save(..., save_format="tf")` or ' '`tf.keras.models.save_model(..., save_format="tf")`.') if serving_only: tf.saved_model.save( model, saved_model_path, signatures=saving_utils.trace_model_call(model, input_signature)) else: _save_v1_format(model, saved_model_path, custom_objects, as_text, input_signature) try: _export_model_json(model, saved_model_path) except NotImplementedError: logging.warning('Skipped saving model JSON, subclassed model does not have ' 'get_config() defined.')
def load_from_saved_model(saved_model_path, custom_objects=None)
-
Loads a keras Model from a SavedModel created by
export_saved_model()
.This function reinstantiates model state by: 1) loading model topology from json (this will eventually come from metagraph). 2) loading model weights from checkpoint.
Example:
import tensorflow as tf # Create a tf.keras model. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, input_shape=[10])) model.summary() # Save the tf.keras model in the SavedModel format. path = '/tmp/simple_keras_model' tf.keras.experimental.export_saved_model(model, path) # Load the saved keras model back. new_model = tf.keras.experimental.load_from_saved_model(path) new_model.summary()
Args
saved_model_path
- a string specifying the path to an existing SavedModel.
custom_objects
- Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization.
Returns
a keras.Model instance.
Expand source code
@keras_export(v1=['keras.experimental.load_from_saved_model']) def load_from_saved_model(saved_model_path, custom_objects=None): """Loads a keras Model from a SavedModel created by `export_saved_model()`. This function reinstantiates model state by: 1) loading model topology from json (this will eventually come from metagraph). 2) loading model weights from checkpoint. Example: ```python import tensorflow as tf # Create a tf.keras model. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, input_shape=[10])) model.summary() # Save the tf.keras model in the SavedModel format. path = '/tmp/simple_keras_model' tf.keras.experimental.export_saved_model(model, path) # Load the saved keras model back. new_model = tf.keras.experimental.load_from_saved_model(path) new_model.summary() ``` Args: saved_model_path: a string specifying the path to an existing SavedModel. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. Returns: a keras.Model instance. """ warnings.warn('`tf.keras.experimental.load_from_saved_model` is deprecated' 'and will be removed in a future version. ' 'Please switch to `tf.keras.models.load_model`.') # restore model topology from json string model_json_filepath = os.path.join( tf.compat.as_bytes(saved_model_path), tf.compat.as_bytes(tf.saved_model.ASSETS_DIRECTORY), tf.compat.as_bytes(SAVED_MODEL_FILENAME_JSON)) with tf.io.gfile.GFile(model_json_filepath, 'r') as f: model_json = f.read() model = model_config.model_from_json( model_json, custom_objects=custom_objects) # restore model weights checkpoint_prefix = os.path.join( tf.compat.as_text(saved_model_path), tf.compat.as_text(tf.saved_model.VARIABLES_DIRECTORY), tf.compat.as_text(tf.saved_model.VARIABLES_FILENAME)) model.load_weights(checkpoint_prefix) return model
Classes
class CosineDecay (initial_learning_rate, decay_steps, alpha=0.0, name=None)
-
A LearningRateSchedule that uses a cosine decay schedule.
See Loshchilov & Hutter, ICLR2016, SGDR: Stochastic Gradient Descent with Warm Restarts.
When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function to an optimizer step, given a provided initial learning rate. It requires a
step
value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step.The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions. It is computed as:
def decayed_learning_rate(step): step = min(step, decay_steps) cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) decayed = (1 - alpha) * cosine_decay + alpha return initial_learning_rate * decayed
Example usage:
decay_steps = 1000 lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( initial_learning_rate, decay_steps)
You can pass this schedule directly into a
tf.keras.optimizers.Optimizer
as the learning rate. The learning rate schedule is also serializable and deserializable usingtf.keras.optimizers.schedules.serialize
andtf.keras.optimizers.schedules.deserialize
.Returns
A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar
Tensor
of the same type asinitial_learning_rate
. Applies cosine decay to the learning rate.Args
initial_learning_rate
- A scalar
float32
orfloat64
Tensor or a Python number. The initial learning rate. decay_steps
- A scalar
int32
orint64
Tensor
or a Python number. Number of steps to decay over. alpha
- A scalar
float32
orfloat64
Tensor or a Python number. Minimum learning rate value as a fraction of initial_learning_rate. name
- String. Optional name of the operation. Defaults to 'CosineDecay'.
Expand source code
class CosineDecay(LearningRateSchedule): """A LearningRateSchedule that uses a cosine decay schedule. See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), SGDR: Stochastic Gradient Descent with Warm Restarts. When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function to an optimizer step, given a provided initial learning rate. It requires a `step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions. It is computed as: ```python def decayed_learning_rate(step): step = min(step, decay_steps) cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) decayed = (1 - alpha) * cosine_decay + alpha return initial_learning_rate * decayed ``` Example usage: ```python decay_steps = 1000 lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay( initial_learning_rate, decay_steps) ``` You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` as the learning rate. The learning rate schedule is also serializable and deserializable using `tf.keras.optimizers.schedules.serialize` and `tf.keras.optimizers.schedules.deserialize`. Returns: A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar `Tensor` of the same type as `initial_learning_rate`. """ def __init__( self, initial_learning_rate, decay_steps, alpha=0.0, name=None): """Applies cosine decay to the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python number. The initial learning rate. decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number of steps to decay over. alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum learning rate value as a fraction of initial_learning_rate. name: String. Optional name of the operation. Defaults to 'CosineDecay'. """ super(CosineDecay, self).__init__() self.initial_learning_rate = initial_learning_rate self.decay_steps = decay_steps self.alpha = alpha self.name = name def __call__(self, step): with tf.name_scope(self.name or "CosineDecay"): initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) global_step_recomp = tf.cast(step, dtype) global_step_recomp = tf.minimum(global_step_recomp, decay_steps) completed_fraction = global_step_recomp / decay_steps cosine_decayed = 0.5 * (1.0 + tf.cos( tf.constant(math.pi) * completed_fraction)) decayed = (1 - self.alpha) * cosine_decayed + self.alpha return tf.multiply(initial_learning_rate, decayed) def get_config(self): return { "initial_learning_rate": self.initial_learning_rate, "decay_steps": self.decay_steps, "alpha": self.alpha, "name": self.name }
Ancestors
Methods
def get_config(self)
-
Expand source code
def get_config(self): return { "initial_learning_rate": self.initial_learning_rate, "decay_steps": self.decay_steps, "alpha": self.alpha, "name": self.name }
Inherited members
class CosineDecayRestarts (initial_learning_rate, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0, name=None)
-
A LearningRateSchedule that uses a cosine decay schedule with restarts.
See Loshchilov & Hutter, ICLR2016, SGDR: Stochastic Gradient Descent with Warm Restarts.
When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function with restarts to an optimizer step, given a provided initial learning rate. It requires a
step
value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step.The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions.
The learning rate multiplier first decays from 1 to
alpha
forfirst_decay_steps
steps. Then, a warm restart is performed. Each new warm restart runs fort_mul
times more steps and withm_mul
times smaller initial learning rate.Example usage:
first_decay_steps = 1000 lr_decayed_fn = ( tf.keras.optimizers.schedules.CosineDecayRestarts( initial_learning_rate, first_decay_steps))
You can pass this schedule directly into a
tf.keras.optimizers.Optimizer
as the learning rate. The learning rate schedule is also serializable and deserializable usingtf.keras.optimizers.schedules.serialize
andtf.keras.optimizers.schedules.deserialize
.Returns
A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar
Tensor
of the same type asinitial_learning_rate
. Applies cosine decay with restarts to the learning rate.Args
initial_learning_rate
- A scalar
float32
orfloat64
Tensor or a Python number. The initial learning rate. first_decay_steps
- A scalar
int32
orint64
Tensor
or a Python number. Number of steps to decay over. t_mul
- A scalar
float32
orfloat64
Tensor
or a Python number. Used to derive the number of iterations in the i-th period m_mul
- A scalar
float32
orfloat64
Tensor
or a Python number. Used to derive the initial learning rate of the i-th period: alpha
- A scalar
float32
orfloat64
Tensor or a Python number. Minimum learning rate value as a fraction of the initial_learning_rate. name
- String. Optional name of the operation. Defaults to 'SGDRDecay'.
Expand source code
class CosineDecayRestarts(LearningRateSchedule): """A LearningRateSchedule that uses a cosine decay schedule with restarts. See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), SGDR: Stochastic Gradient Descent with Warm Restarts. When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function with restarts to an optimizer step, given a provided initial learning rate. It requires a `step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions. The learning rate multiplier first decays from 1 to `alpha` for `first_decay_steps` steps. Then, a warm restart is performed. Each new warm restart runs for `t_mul` times more steps and with `m_mul` times smaller initial learning rate. Example usage: ```python first_decay_steps = 1000 lr_decayed_fn = ( tf.keras.optimizers.schedules.CosineDecayRestarts( initial_learning_rate, first_decay_steps)) ``` You can pass this schedule directly into a `tf.keras.optimizers.Optimizer` as the learning rate. The learning rate schedule is also serializable and deserializable using `tf.keras.optimizers.schedules.serialize` and `tf.keras.optimizers.schedules.deserialize`. Returns: A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar `Tensor` of the same type as `initial_learning_rate`. """ def __init__( self, initial_learning_rate, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0, name=None): """Applies cosine decay with restarts to the learning rate. Args: initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python number. The initial learning rate. first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Number of steps to decay over. t_mul: A scalar `float32` or `float64` `Tensor` or a Python number. Used to derive the number of iterations in the i-th period m_mul: A scalar `float32` or `float64` `Tensor` or a Python number. Used to derive the initial learning rate of the i-th period: alpha: A scalar `float32` or `float64` Tensor or a Python number. Minimum learning rate value as a fraction of the initial_learning_rate. name: String. Optional name of the operation. Defaults to 'SGDRDecay'. """ super(CosineDecayRestarts, self).__init__() self.initial_learning_rate = initial_learning_rate self.first_decay_steps = first_decay_steps self._t_mul = t_mul self._m_mul = m_mul self.alpha = alpha self.name = name def __call__(self, step): with tf.name_scope(self.name or "SGDRDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype first_decay_steps = tf.cast(self.first_decay_steps, dtype) alpha = tf.cast(self.alpha, dtype) t_mul = tf.cast(self._t_mul, dtype) m_mul = tf.cast(self._m_mul, dtype) global_step_recomp = tf.cast(step, dtype) completed_fraction = global_step_recomp / first_decay_steps def compute_step(completed_fraction, geometric=False): """Helper for `cond` operation.""" if geometric: i_restart = tf.floor( tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) / tf.math.log(t_mul)) sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul) completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart else: i_restart = tf.floor(completed_fraction) completed_fraction -= i_restart return i_restart, completed_fraction i_restart, completed_fraction = tf.cond( tf.equal(t_mul, 1.0), lambda: compute_step(completed_fraction, geometric=False), lambda: compute_step(completed_fraction, geometric=True)) m_fac = m_mul**i_restart cosine_decayed = 0.5 * m_fac * (1.0 + tf.cos( tf.constant(math.pi) * completed_fraction)) decayed = (1 - alpha) * cosine_decayed + alpha return tf.multiply(initial_learning_rate, decayed, name=name) def get_config(self): return { "initial_learning_rate": self.initial_learning_rate, "first_decay_steps": self.first_decay_steps, "t_mul": self._t_mul, "m_mul": self._m_mul, "alpha": self.alpha, "name": self.name }
Ancestors
Methods
def get_config(self)
-
Expand source code
def get_config(self): return { "initial_learning_rate": self.initial_learning_rate, "first_decay_steps": self.first_decay_steps, "t_mul": self._t_mul, "m_mul": self._m_mul, "alpha": self.alpha, "name": self.name }
Inherited members
class LinearModel (units=1, activation=None, use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, **kwargs)
-
Linear Model for regression and classification problems.
This model approximates the following function: $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$ where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature.
Example:
model = LinearModel() model.compile(optimizer='sgd', loss='mse') model.fit(x, y, epochs=epochs)
This model accepts sparse float inputs as well:
Example:
model = LinearModel() opt = tf.keras.optimizers.Adam() loss_fn = tf.keras.losses.MeanSquaredError() with tf.GradientTape() as tape: output = model(sparse_input) loss = tf.reduce_mean(loss_fn(target, output)) grads = tape.gradient(loss, model.weights) opt.apply_gradients(zip(grads, model.weights))
Create a Linear Model.
Args
units
- Positive integer, output dimension without the batch size.
activation
- Activation function to use. If you don't specify anything, no activation is applied.
use_bias
- whether to calculate the bias/intercept for this model. If set to False, no bias/intercept will be used in calculations, e.g., the data is already centered.
kernel_initializer
- Initializer for the
kernel
weights matrices. bias_initializer
- Initializer for the bias vector.
kernel_regularizer
- regularizer for kernel vectors.
bias_regularizer
- regularizer for bias vector.
**kwargs
- The keyword arguments that are passed on to BaseLayer.init.
Expand source code
class LinearModel(training.Model): r"""Linear Model for regression and classification problems. This model approximates the following function: $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$ where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature. Example: ```python model = LinearModel() model.compile(optimizer='sgd', loss='mse') model.fit(x, y, epochs=epochs) ``` This model accepts sparse float inputs as well: Example: ```python model = LinearModel() opt = tf.keras.optimizers.Adam() loss_fn = tf.keras.losses.MeanSquaredError() with tf.GradientTape() as tape: output = model(sparse_input) loss = tf.reduce_mean(loss_fn(target, output)) grads = tape.gradient(loss, model.weights) opt.apply_gradients(zip(grads, model.weights)) ``` """ def __init__(self, units=1, activation=None, use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, **kwargs): """Create a Linear Model. Args: units: Positive integer, output dimension without the batch size. activation: Activation function to use. If you don't specify anything, no activation is applied. use_bias: whether to calculate the bias/intercept for this model. If set to False, no bias/intercept will be used in calculations, e.g., the data is already centered. kernel_initializer: Initializer for the `kernel` weights matrices. bias_initializer: Initializer for the bias vector. kernel_regularizer: regularizer for kernel vectors. bias_regularizer: regularizer for bias vector. **kwargs: The keyword arguments that are passed on to BaseLayer.__init__. """ self.units = units self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) super(LinearModel, self).__init__(**kwargs) base_layer.keras_premade_model_gauge.get_cell('Linear').set(True) def build(self, input_shape): if isinstance(input_shape, dict): names = sorted(list(input_shape.keys())) self.input_specs = [] self.dense_layers = [] for name in names: shape = input_shape[name] layer = core.Dense( units=self.units, use_bias=False, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, name=name) layer.build(shape) self.input_specs.append( input_spec.InputSpec(shape=shape, name=name)) self.dense_layers.append(layer) elif isinstance(input_shape, (tuple, list)) and all( isinstance(shape, tf.TensorShape) for shape in input_shape): self.dense_layers = [] for shape in input_shape: layer = core.Dense( units=self.units, use_bias=False, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) layer.build(shape) self.dense_layers.append(layer) else: # input_shape can be a single TensorShape or a tuple of ints. layer = core.Dense( units=self.units, use_bias=False, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) layer.build(input_shape) self.dense_layers = [layer] if self.use_bias: self.bias = self.add_weight( 'bias', shape=self.units, initializer=self.bias_initializer, regularizer=self.bias_regularizer, dtype=self.dtype, trainable=True) else: self.bias = None self.built = True def call(self, inputs): result = None if isinstance(inputs, dict): names = [layer.name for layer in self.dense_layers] different_keys = set(names) - set(inputs.keys()) if different_keys: raise ValueError( 'The input dictionary does not match ' 'the structure expected by the model.' '\n\tExpected keys: {}' '\n\tReceived keys: {}' '\n\tMissing keys: {}'.format(set(names), set(inputs.keys()), different_keys)) inputs = [inputs[name] for name in names] for inp, layer in zip(inputs, self.dense_layers): output = layer(inp) if result is None: result = output else: result += output elif isinstance(inputs, (tuple, list)): for inp, layer in zip(inputs, self.dense_layers): output = layer(inp) if result is None: result = output else: result += output else: result = self.dense_layers[0](inputs) if self.use_bias: result = tf.nn.bias_add(result, self.bias) if self.activation is not None: return self.activation(result) # pylint: disable=not-callable return result def get_config(self): config = { 'units': self.units, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), } base_config = base_layer.Layer.get_config(self) return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): del custom_objects return cls(**config)
Ancestors
- Model
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
- ModelVersionSelector
Inherited members
Model
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
build
call
compile
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
distribute_strategy
dtype
dtype_policy
dynamic
evaluate
evaluate_generator
finalize_state
fit
fit_generator
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_layer
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
load_weights
losses
make_predict_function
make_test_function
make_train_function
metrics
metrics_names
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
predict
predict_generator
predict_on_batch
predict_step
reset_metrics
run_eagerly
save
save_spec
save_weights
set_weights
state_updates
summary
supports_masking
test_on_batch
test_step
to_json
to_yaml
train_on_batch
train_step
trainable_variables
trainable_weights
variable_dtype
variables
weights
class PeepholeLSTMCell (units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, **kwargs)
-
Equivalent to LSTMCell class but adds peephole connections.
Peephole connections allow the gates to utilize the previous internal state as well as the previous hidden state (which is what LSTMCell is limited to). This allows PeepholeLSTMCell to better learn precise timings over LSTMCell.
From Gers et al., 2002:
"We find that LSTM augmented by 'peephole connections' from its internal cells to its multiplicative gates can learn the fine distinction between sequences of spikes spaced either 50 or 49 time steps apart without the help of any short training exemplars."
The peephole implementation is based on:
Example:
# Create 2 PeepholeLSTMCells peephole_lstm_cells = [PeepholeLSTMCell(size) for size in [128, 256]] # Create a layer composed sequentially of the peephole LSTM cells. layer = RNN(peephole_lstm_cells) input = keras.Input((timesteps, input_dim)) output = layer(input)
Expand source code
class PeepholeLSTMCell(LSTMCell): """Equivalent to LSTMCell class but adds peephole connections. Peephole connections allow the gates to utilize the previous internal state as well as the previous hidden state (which is what LSTMCell is limited to). This allows PeepholeLSTMCell to better learn precise timings over LSTMCell. From [Gers et al., 2002]( http://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf): "We find that LSTM augmented by 'peephole connections' from its internal cells to its multiplicative gates can learn the fine distinction between sequences of spikes spaced either 50 or 49 time steps apart without the help of any short training exemplars." The peephole implementation is based on: [Sak et al., 2014](https://research.google.com/pubs/archive/43905.pdf) Example: ```python # Create 2 PeepholeLSTMCells peephole_lstm_cells = [PeepholeLSTMCell(size) for size in [128, 256]] # Create a layer composed sequentially of the peephole LSTM cells. layer = RNN(peephole_lstm_cells) input = keras.Input((timesteps, input_dim)) output = layer(input) ``` """ def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., **kwargs): warnings.warn('`tf.keras.experimental.PeepholeLSTMCell` is deprecated ' 'and will be removed in a future version. ' 'Please use tensorflow_addons.rnn.PeepholeLSTMCell ' 'instead.') super(PeepholeLSTMCell, self).__init__( units=units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, unit_forget_bias=unit_forget_bias, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=kwargs.pop('implementation', 1), **kwargs) def build(self, input_shape): super(PeepholeLSTMCell, self).build(input_shape) # The following are the weight matrices for the peephole connections. These # are multiplied with the previous internal state during the computation of # carry and output. self.input_gate_peephole_weights = self.add_weight( shape=(self.units,), name='input_gate_peephole_weights', initializer=self.kernel_initializer) self.forget_gate_peephole_weights = self.add_weight( shape=(self.units,), name='forget_gate_peephole_weights', initializer=self.kernel_initializer) self.output_gate_peephole_weights = self.add_weight( shape=(self.units,), name='output_gate_peephole_weights', initializer=self.kernel_initializer) def _compute_carry_and_output(self, x, h_tm1, c_tm1): x_i, x_f, x_c, x_o = x h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1 i = self.recurrent_activation( x_i + backend.dot(h_tm1_i, self.recurrent_kernel[:, :self.units]) + self.input_gate_peephole_weights * c_tm1) f = self.recurrent_activation(x_f + backend.dot( h_tm1_f, self.recurrent_kernel[:, self.units:self.units * 2]) + self.forget_gate_peephole_weights * c_tm1) c = f * c_tm1 + i * self.activation(x_c + backend.dot( h_tm1_c, self.recurrent_kernel[:, self.units * 2:self.units * 3])) o = self.recurrent_activation( x_o + backend.dot(h_tm1_o, self.recurrent_kernel[:, self.units * 3:]) + self.output_gate_peephole_weights * c) return c, o def _compute_carry_and_output_fused(self, z, c_tm1): z0, z1, z2, z3 = z i = self.recurrent_activation(z0 + self.input_gate_peephole_weights * c_tm1) f = self.recurrent_activation(z1 + self.forget_gate_peephole_weights * c_tm1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3 + self.output_gate_peephole_weights * c) return c, o
Ancestors
- LSTMCell
- DropoutRNNCellMixin
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Inherited members
LSTMCell
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
call
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_dropout_mask_for_cell
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_recurrent_dropout_mask_for_cell
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
reset_dropout_mask
reset_recurrent_dropout_mask
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights
Layer
:
class SequenceFeatures (feature_columns, trainable=True, name=None, **kwargs)
-
A layer for sequence input.
All
feature_columns
must be sequence dense columns with the samesequence_length
. The output of this method can be fed into sequence networks, such as RNN.The output of this method is a 3D
Tensor
of shape[batch_size, T, D]
.T
is the maximum sequence length for this batch, which could differ from batch to batch.If multiple
feature_columns
are given withDi
num_elements
each, their outputs are concatenated. So, the finalTensor
has shape[batch_size, T, D0 + D1 + ... + Dn]
.Example:
import tensorflow as tf # Behavior of some cells or feature columns may depend on whether we are in # training or inference mode, e.g. applying dropout. training = True rating = tf.feature_column.sequence_numeric_column('rating') watches = tf.feature_column.sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = tf.feature_column.embedding_column(watches, dimension=10) columns = [rating, watches_embedding] features = { 'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0], [2.0,2.1,2.2, 2.3, 2.5]]), 'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]]) } sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns) sequence_input, sequence_length = sequence_input_layer( features, training=training) sequence_length_mask = tf.sequence_mask(sequence_length) hidden_size = 32 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
"Constructs a SequenceFeatures layer.
Args
feature_columns
- An iterable of dense sequence columns. Valid columns are
-
embedding_column
that wraps asequence_categorical_column_with_*
-sequence_numeric_column
. trainable
- Boolean, whether the layer's variables will be updated via gradient descent during training.
name
- Name to give to the SequenceFeatures.
**kwargs
- Keyword arguments to construct a layer.
Raises
ValueError
- If any of the
feature_columns
is not aSequenceDenseColumn
.
Expand source code
class SequenceFeatures(kfc._BaseFeaturesLayer): """A layer for sequence input. All `feature_columns` must be sequence dense columns with the same `sequence_length`. The output of this method can be fed into sequence networks, such as RNN. The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. If multiple `feature_columns` are given with `Di` `num_elements` each, their outputs are concatenated. So, the final `Tensor` has shape `[batch_size, T, D0 + D1 + ... + Dn]`. Example: ```python import tensorflow as tf # Behavior of some cells or feature columns may depend on whether we are in # training or inference mode, e.g. applying dropout. training = True rating = tf.feature_column.sequence_numeric_column('rating') watches = tf.feature_column.sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = tf.feature_column.embedding_column(watches, dimension=10) columns = [rating, watches_embedding] features = { 'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0], [2.0,2.1,2.2, 2.3, 2.5]]), 'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]]) } sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns) sequence_input, sequence_length = sequence_input_layer( features, training=training) sequence_length_mask = tf.sequence_mask(sequence_length) hidden_size = 32 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask) ``` """ def __init__( self, feature_columns, trainable=True, name=None, **kwargs): """"Constructs a SequenceFeatures layer. Args: feature_columns: An iterable of dense sequence columns. Valid columns are - `embedding_column` that wraps a `sequence_categorical_column_with_*` - `sequence_numeric_column`. trainable: Boolean, whether the layer's variables will be updated via gradient descent during training. name: Name to give to the SequenceFeatures. **kwargs: Keyword arguments to construct a layer. Raises: ValueError: If any of the `feature_columns` is not a `SequenceDenseColumn`. """ super(SequenceFeatures, self).__init__( feature_columns=feature_columns, trainable=trainable, name=name, expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn, **kwargs) @property def _is_feature_layer(self): return True def _target_shape(self, input_shape, total_elements): return (input_shape[0], input_shape[1], total_elements) def call(self, features, training=None): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) if training is None: training = backend.learning_phase() transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) except TypeError: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append(self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. kfc._verify_static_batch_size_equality( # pylint: disable=protected-access sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
Ancestors
- keras.feature_column.base_feature_layer._BaseFeaturesLayer
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Methods
def call(self, features, training=None)
-
Returns sequence input corresponding to the
feature_columns
.Args
features
- A dict mapping keys to tensors.
training
- Python boolean or None, indicating whether to the layer is being
run in training mode. This argument is passed to the call method of any
FeatureColumn
that takes atraining
argument. For example, if aFeatureColumn
performed dropout, the column could expose atraining
argument to control whether the dropout should be applied. IfNone
, defaults totf.keras.backend.learning_phase()
.
Returns
An
(input_layer, sequence_length)
tuple where: - input_layer: A floatTensor
of shape[batch_size, T, D]
.T
is the maximum sequence length for this batch, which could differ from batch to batch.D
is the sum ofnum_elements
for allfeature_columns
. - sequence_length: An intTensor
of shape[batch_size]
. The sequence length for each example.Raises
ValueError
- If features are not a dictionary.
Expand source code
def call(self, features, training=None): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) if training is None: training = backend.learning_phase() transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) except TypeError: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append(self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. kfc._verify_static_batch_size_equality( # pylint: disable=protected-access sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
Inherited members
Layer
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
build
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights
class WideDeepModel (linear_model, dnn_model, activation=None, **kwargs)
-
Wide & Deep Model for regression and classification problems.
This model jointly train a linear and a dnn model.
Example:
linear_model = LinearModel() dnn_model = keras.Sequential([keras.layers.Dense(units=64), keras.layers.Dense(units=1)]) combined_model = WideDeepModel(linear_model, dnn_model) combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse']) # define dnn_inputs and linear_inputs as separate numpy arrays or # a single numpy array if dnn_inputs is same as linear_inputs. combined_model.fit([linear_inputs, dnn_inputs], y, epochs) # or define a single `tf.data.Dataset` that contains a single tensor or # separate tensors for dnn_inputs and linear_inputs. dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y)) combined_model.fit(dataset, epochs)
Both linear and dnn model can be pre-compiled and trained separately before jointly training:
Example:
linear_model = LinearModel() linear_model.compile('adagrad', 'mse') linear_model.fit(linear_inputs, y, epochs) dnn_model = keras.Sequential([keras.layers.Dense(units=1)]) dnn_model.compile('rmsprop', 'mse') dnn_model.fit(dnn_inputs, y, epochs) combined_model = WideDeepModel(linear_model, dnn_model) combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse']) combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
Create a Wide & Deep Model.
Args
linear_model
- a premade LinearModel, its output must match the output of the dnn model.
dnn_model
- a
tf.keras.Model
, its output must match the output of the linear model. activation
- Activation function. Set it to None to maintain a linear activation.
**kwargs
- The keyword arguments that are passed on to BaseLayer.init.
Allowed keyword arguments include
name
.
Expand source code
class WideDeepModel(keras_training.Model): r"""Wide & Deep Model for regression and classification problems. This model jointly train a linear and a dnn model. Example: ```python linear_model = LinearModel() dnn_model = keras.Sequential([keras.layers.Dense(units=64), keras.layers.Dense(units=1)]) combined_model = WideDeepModel(linear_model, dnn_model) combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse']) # define dnn_inputs and linear_inputs as separate numpy arrays or # a single numpy array if dnn_inputs is same as linear_inputs. combined_model.fit([linear_inputs, dnn_inputs], y, epochs) # or define a single `tf.data.Dataset` that contains a single tensor or # separate tensors for dnn_inputs and linear_inputs. dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y)) combined_model.fit(dataset, epochs) ``` Both linear and dnn model can be pre-compiled and trained separately before jointly training: Example: ```python linear_model = LinearModel() linear_model.compile('adagrad', 'mse') linear_model.fit(linear_inputs, y, epochs) dnn_model = keras.Sequential([keras.layers.Dense(units=1)]) dnn_model.compile('rmsprop', 'mse') dnn_model.fit(dnn_inputs, y, epochs) combined_model = WideDeepModel(linear_model, dnn_model) combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse']) combined_model.fit([linear_inputs, dnn_inputs], y, epochs) ``` """ def __init__(self, linear_model, dnn_model, activation=None, **kwargs): """Create a Wide & Deep Model. Args: linear_model: a premade LinearModel, its output must match the output of the dnn model. dnn_model: a `tf.keras.Model`, its output must match the output of the linear model. activation: Activation function. Set it to None to maintain a linear activation. **kwargs: The keyword arguments that are passed on to BaseLayer.__init__. Allowed keyword arguments include `name`. """ super(WideDeepModel, self).__init__(**kwargs) base_layer.keras_premade_model_gauge.get_cell('WideDeep').set(True) self.linear_model = linear_model self.dnn_model = dnn_model self.activation = activations.get(activation) def call(self, inputs, training=None): if not isinstance(inputs, (tuple, list)) or len(inputs) != 2: linear_inputs = dnn_inputs = inputs else: linear_inputs, dnn_inputs = inputs linear_output = self.linear_model(linear_inputs) # pylint: disable=protected-access if self.dnn_model._expects_training_arg: if training is None: training = backend.learning_phase() dnn_output = self.dnn_model(dnn_inputs, training=training) else: dnn_output = self.dnn_model(dnn_inputs) output = tf.nest.map_structure(lambda x, y: (x + y), linear_output, dnn_output) if self.activation: return tf.nest.map_structure(self.activation, output) return output # This does not support gradient scaling and LossScaleOptimizer. def train_step(self, data): x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) x, y, sample_weight = data_adapter.expand_1d((x, y, sample_weight)) with tf.GradientTape() as tape: y_pred = self(x, training=True) loss = self.compiled_loss( y, y_pred, sample_weight, regularization_losses=self.losses) self.compiled_metrics.update_state(y, y_pred, sample_weight) if isinstance(self.optimizer, (list, tuple)): linear_vars = self.linear_model.trainable_variables dnn_vars = self.dnn_model.trainable_variables linear_grads, dnn_grads = tape.gradient(loss, (linear_vars, dnn_vars)) linear_optimizer = self.optimizer[0] dnn_optimizer = self.optimizer[1] linear_optimizer.apply_gradients(zip(linear_grads, linear_vars)) dnn_optimizer.apply_gradients(zip(dnn_grads, dnn_vars)) else: trainable_variables = self.trainable_variables grads = tape.gradient(loss, trainable_variables) self.optimizer.apply_gradients(zip(grads, trainable_variables)) return {m.name: m.result() for m in self.metrics} def _make_train_function(self): # Only needed for graph mode and model_to_estimator. has_recompiled = self._recompile_weights_loss_and_weighted_metrics() self._check_trainable_weights_consistency() # If we have re-compiled the loss/weighted metric sub-graphs then create # train function even if one exists already. This is because # `_feed_sample_weights` list has been updated on re-compile. if getattr(self, 'train_function', None) is None or has_recompiled: # Restore the compiled trainable state. current_trainable_state = self._get_trainable_state() self._set_trainable_state(self._compiled_trainable_state) inputs = ( self._feed_inputs + self._feed_targets + self._feed_sample_weights) if not isinstance(backend.symbolic_learning_phase(), int): inputs += [backend.symbolic_learning_phase()] if isinstance(self.optimizer, (list, tuple)): linear_optimizer = self.optimizer[0] dnn_optimizer = self.optimizer[1] else: linear_optimizer = self.optimizer dnn_optimizer = self.optimizer with backend.get_graph().as_default(): with backend.name_scope('training'): # Training updates updates = [] linear_updates = linear_optimizer.get_updates( params=self.linear_model.trainable_weights, # pylint: disable=protected-access loss=self.total_loss) updates += linear_updates dnn_updates = dnn_optimizer.get_updates( params=self.dnn_model.trainable_weights, # pylint: disable=protected-access loss=self.total_loss) updates += dnn_updates # Unconditional updates updates += self.get_updates_for(None) # Conditional updates relevant to this model updates += self.get_updates_for(self.inputs) metrics = self._get_training_eval_metrics() metrics_tensors = [ m._call_result for m in metrics if hasattr(m, '_call_result') # pylint: disable=protected-access ] with backend.name_scope('training'): # Gets loss and metrics. Updates weights at each call. fn = backend.function( inputs, [self.total_loss] + metrics_tensors, updates=updates, name='train_function', **self._function_kwargs) setattr(self, 'train_function', fn) # Restore the current trainable state self._set_trainable_state(current_trainable_state) def get_config(self): linear_config = generic_utils.serialize_keras_object(self.linear_model) dnn_config = generic_utils.serialize_keras_object(self.dnn_model) config = { 'linear_model': linear_config, 'dnn_model': dnn_config, 'activation': activations.serialize(self.activation), } base_config = base_layer.Layer.get_config(self) return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): linear_config = config.pop('linear_model') linear_model = layer_module.deserialize(linear_config, custom_objects) dnn_config = config.pop('dnn_model') dnn_model = layer_module.deserialize(dnn_config, custom_objects) activation = activations.deserialize( config.pop('activation', None), custom_objects=custom_objects) return cls( linear_model=linear_model, dnn_model=dnn_model, activation=activation, **config)
Ancestors
- Model
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
- ModelVersionSelector
Inherited members
Model
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
build
call
compile
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
distribute_strategy
dtype
dtype_policy
dynamic
evaluate
evaluate_generator
finalize_state
fit
fit_generator
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_layer
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
load_weights
losses
make_predict_function
make_test_function
make_train_function
metrics
metrics_names
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
predict
predict_generator
predict_on_batch
predict_step
reset_metrics
run_eagerly
save
save_spec
save_weights
set_weights
state_updates
summary
supports_masking
test_on_batch
test_step
to_json
to_yaml
train_on_batch
train_step
trainable_variables
trainable_weights
variable_dtype
variables
weights