Module keras.api.keras.experimental

Public API for tf.keras.experimental namespace.

Expand source code
# This file is MACHINE GENERATED! Do not edit.
# Generated by: tensorflow/python/tools/api/generator/create_python_api.py script.
"""Public API for tf.keras.experimental namespace.
"""

from __future__ import print_function as _print_function

import sys as _sys

from keras.feature_column.sequence_feature_column import SequenceFeatures
from keras.layers.recurrent import PeepholeLSTMCell
from keras.optimizer_v2.learning_rate_schedule import CosineDecay
from keras.optimizer_v2.learning_rate_schedule import CosineDecayRestarts
from keras.premade.linear import LinearModel
from keras.premade.wide_deep import WideDeepModel
from keras.saving.saved_model_experimental import export_saved_model
from keras.saving.saved_model_experimental import load_from_saved_model

del _print_function

from tensorflow.python.util import module_wrapper as _module_wrapper

if not isinstance(_sys.modules[__name__], _module_wrapper.TFModuleWrapper):
  _sys.modules[__name__] = _module_wrapper.TFModuleWrapper(
      _sys.modules[__name__], "keras.experimental", public_apis=None, deprecation=True,
      has_lite=False)

Functions

def export_saved_model(model, saved_model_path, custom_objects=None, as_text=False, input_signature=None, serving_only=False)

Exports a tf.keras.Model as a Tensorflow SavedModel.

Note that at this time, subclassed models can only be saved using serving_only=True.

The exported SavedModel is a standalone serialization of Tensorflow objects, and is supported by TF language APIs and the Tensorflow Serving system. To load the model, use the function tf.keras.experimental.load_from_saved_model.

The SavedModel contains:

  1. a checkpoint containing the model weights.
  2. a SavedModel proto containing the Tensorflow backend graph. Separate graphs are saved for prediction (serving), train, and evaluation. If the model has not been compiled, then only the graph computing predictions will be exported.
  3. the model's json config. If the model is subclassed, this will only be included if the model's get_config() method is overwritten.

Example:

import tensorflow as tf

# Create a tf.keras model.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=[10]))
model.summary()

# Save the tf.keras model in the SavedModel format.
path = '/tmp/simple_keras_model'
tf.keras.experimental.export_saved_model(model, path)

# Load the saved keras model back.
new_model = tf.keras.experimental.load_from_saved_model(path)
new_model.summary()

Args

model
A tf.keras.Model to be saved. If the model is subclassed, the flag serving_only must be set to True.
saved_model_path
a string specifying the path to the SavedModel directory.
custom_objects
Optional dictionary mapping string names to custom classes or functions (e.g. custom loss functions).
as_text
bool, False by default. Whether to write the SavedModel proto in text format. Currently unavailable in serving-only mode.
input_signature
A possibly nested sequence of tf.TensorSpec objects, used to specify the expected model inputs. See tf.function for more details.
serving_only
bool, False by default. When this is true, only the prediction graph is saved.

Raises

NotImplementedError
If the model is a subclassed model, and serving_only is False.
ValueError
If the input signature cannot be inferred from the model.
AssertionError
If the SavedModel directory already exists and isn't empty.
Expand source code
@keras_export(v1=['keras.experimental.export_saved_model'])
def export_saved_model(model,
                       saved_model_path,
                       custom_objects=None,
                       as_text=False,
                       input_signature=None,
                       serving_only=False):
  """Exports a `tf.keras.Model` as a Tensorflow SavedModel.

  Note that at this time, subclassed models can only be saved using
  `serving_only=True`.

  The exported `SavedModel` is a standalone serialization of Tensorflow objects,
  and is supported by TF language APIs and the Tensorflow Serving system.
  To load the model, use the function
  `tf.keras.experimental.load_from_saved_model`.

  The `SavedModel` contains:

  1. a checkpoint containing the model weights.
  2. a `SavedModel` proto containing the Tensorflow backend graph. Separate
     graphs are saved for prediction (serving), train, and evaluation. If
     the model has not been compiled, then only the graph computing predictions
     will be exported.
  3. the model's json config. If the model is subclassed, this will only be
     included if the model's `get_config()` method is overwritten.

  Example:

  ```python
  import tensorflow as tf

  # Create a tf.keras model.
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Dense(1, input_shape=[10]))
  model.summary()

  # Save the tf.keras model in the SavedModel format.
  path = '/tmp/simple_keras_model'
  tf.keras.experimental.export_saved_model(model, path)

  # Load the saved keras model back.
  new_model = tf.keras.experimental.load_from_saved_model(path)
  new_model.summary()
  ```

  Args:
    model: A `tf.keras.Model` to be saved. If the model is subclassed, the flag
      `serving_only` must be set to True.
    saved_model_path: a string specifying the path to the SavedModel directory.
    custom_objects: Optional dictionary mapping string names to custom classes
      or functions (e.g. custom loss functions).
    as_text: bool, `False` by default. Whether to write the `SavedModel` proto
      in text format. Currently unavailable in serving-only mode.
    input_signature: A possibly nested sequence of `tf.TensorSpec` objects, used
      to specify the expected model inputs. See `tf.function` for more details.
    serving_only: bool, `False` by default. When this is true, only the
      prediction graph is saved.

  Raises:
    NotImplementedError: If the model is a subclassed model, and serving_only is
      False.
    ValueError: If the input signature cannot be inferred from the model.
    AssertionError: If the SavedModel directory already exists and isn't empty.
  """
  warnings.warn('`tf.keras.experimental.export_saved_model` is deprecated'
                'and will be removed in a future version. '
                'Please use `model.save(..., save_format="tf")` or '
                '`tf.keras.models.save_model(..., save_format="tf")`.')
  if serving_only:
    tf.saved_model.save(
        model,
        saved_model_path,
        signatures=saving_utils.trace_model_call(model, input_signature))
  else:
    _save_v1_format(model, saved_model_path, custom_objects, as_text,
                    input_signature)

  try:
    _export_model_json(model, saved_model_path)
  except NotImplementedError:
    logging.warning('Skipped saving model JSON, subclassed model does not have '
                    'get_config() defined.')
def load_from_saved_model(saved_model_path, custom_objects=None)

Loads a keras Model from a SavedModel created by export_saved_model().

This function reinstantiates model state by: 1) loading model topology from json (this will eventually come from metagraph). 2) loading model weights from checkpoint.

Example:

import tensorflow as tf

# Create a tf.keras model.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=[10]))
model.summary()

# Save the tf.keras model in the SavedModel format.
path = '/tmp/simple_keras_model'
tf.keras.experimental.export_saved_model(model, path)

# Load the saved keras model back.
new_model = tf.keras.experimental.load_from_saved_model(path)
new_model.summary()

Args

saved_model_path
a string specifying the path to an existing SavedModel.
custom_objects
Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization.

Returns

a keras.Model instance.

Expand source code
@keras_export(v1=['keras.experimental.load_from_saved_model'])
def load_from_saved_model(saved_model_path, custom_objects=None):
  """Loads a keras Model from a SavedModel created by `export_saved_model()`.

  This function reinstantiates model state by:
  1) loading model topology from json (this will eventually come
     from metagraph).
  2) loading model weights from checkpoint.

  Example:

  ```python
  import tensorflow as tf

  # Create a tf.keras model.
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Dense(1, input_shape=[10]))
  model.summary()

  # Save the tf.keras model in the SavedModel format.
  path = '/tmp/simple_keras_model'
  tf.keras.experimental.export_saved_model(model, path)

  # Load the saved keras model back.
  new_model = tf.keras.experimental.load_from_saved_model(path)
  new_model.summary()
  ```

  Args:
    saved_model_path: a string specifying the path to an existing SavedModel.
    custom_objects: Optional dictionary mapping names
        (strings) to custom classes or functions to be
        considered during deserialization.

  Returns:
    a keras.Model instance.
  """
  warnings.warn('`tf.keras.experimental.load_from_saved_model` is deprecated'
                'and will be removed in a future version. '
                'Please switch to `tf.keras.models.load_model`.')
  # restore model topology from json string
  model_json_filepath = os.path.join(
      tf.compat.as_bytes(saved_model_path),
      tf.compat.as_bytes(tf.saved_model.ASSETS_DIRECTORY),
      tf.compat.as_bytes(SAVED_MODEL_FILENAME_JSON))
  with tf.io.gfile.GFile(model_json_filepath, 'r') as f:
    model_json = f.read()
  model = model_config.model_from_json(
      model_json, custom_objects=custom_objects)

  # restore model weights
  checkpoint_prefix = os.path.join(
      tf.compat.as_text(saved_model_path),
      tf.compat.as_text(tf.saved_model.VARIABLES_DIRECTORY),
      tf.compat.as_text(tf.saved_model.VARIABLES_FILENAME))
  model.load_weights(checkpoint_prefix)
  return model

Classes

class CosineDecay (initial_learning_rate, decay_steps, alpha=0.0, name=None)

A LearningRateSchedule that uses a cosine decay schedule.

See Loshchilov & Hutter, ICLR2016, SGDR: Stochastic Gradient Descent with Warm Restarts.

When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function to an optimizer step, given a provided initial learning rate. It requires a step value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step.

The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions. It is computed as:

def decayed_learning_rate(step):
  step = min(step, decay_steps)
  cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps))
  decayed = (1 - alpha) * cosine_decay + alpha
  return initial_learning_rate * decayed

Example usage:

decay_steps = 1000
lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate, decay_steps)

You can pass this schedule directly into a tf.keras.optimizers.Optimizer as the learning rate. The learning rate schedule is also serializable and deserializable using tf.keras.optimizers.schedules.serialize and tf.keras.optimizers.schedules.deserialize.

Returns

A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar Tensor of the same type as initial_learning_rate. Applies cosine decay to the learning rate.

Args

initial_learning_rate
A scalar float32 or float64 Tensor or a Python number. The initial learning rate.
decay_steps
A scalar int32 or int64 Tensor or a Python number. Number of steps to decay over.
alpha
A scalar float32 or float64 Tensor or a Python number. Minimum learning rate value as a fraction of initial_learning_rate.
name
String. Optional name of the operation. Defaults to 'CosineDecay'.
Expand source code
class CosineDecay(LearningRateSchedule):
  """A LearningRateSchedule that uses a cosine decay schedule.

  See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
  SGDR: Stochastic Gradient Descent with Warm Restarts.

  When training a model, it is often useful to lower the learning rate as
  the training progresses. This schedule applies a cosine decay function
  to an optimizer step, given a provided initial learning rate.
  It requires a `step` value to compute the decayed learning rate. You can
  just pass a TensorFlow variable that you increment at each training step.

  The schedule a 1-arg callable that produces a decayed learning
  rate when passed the current optimizer step. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.
  It is computed as:

  ```python
  def decayed_learning_rate(step):
    step = min(step, decay_steps)
    cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps))
    decayed = (1 - alpha) * cosine_decay + alpha
    return initial_learning_rate * decayed
  ```

  Example usage:
  ```python
  decay_steps = 1000
  lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay(
      initial_learning_rate, decay_steps)
  ```

  You can pass this schedule directly into a `tf.keras.optimizers.Optimizer`
  as the learning rate. The learning rate schedule is also serializable and
  deserializable using `tf.keras.optimizers.schedules.serialize` and
  `tf.keras.optimizers.schedules.deserialize`.

  Returns:
    A 1-arg callable learning rate schedule that takes the current optimizer
    step and outputs the decayed learning rate, a scalar `Tensor` of the same
    type as `initial_learning_rate`.
  """

  def __init__(
      self,
      initial_learning_rate,
      decay_steps,
      alpha=0.0,
      name=None):
    """Applies cosine decay to the learning rate.

    Args:
      initial_learning_rate: A scalar `float32` or `float64` Tensor or a
        Python number. The initial learning rate.
      decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
        Number of steps to decay over.
      alpha: A scalar `float32` or `float64` Tensor or a Python number.
        Minimum learning rate value as a fraction of initial_learning_rate.
      name: String. Optional name of the operation.  Defaults to 'CosineDecay'.
    """
    super(CosineDecay, self).__init__()

    self.initial_learning_rate = initial_learning_rate
    self.decay_steps = decay_steps
    self.alpha = alpha
    self.name = name

  def __call__(self, step):
    with tf.name_scope(self.name or "CosineDecay"):
      initial_learning_rate = tf.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      decay_steps = tf.cast(self.decay_steps, dtype)

      global_step_recomp = tf.cast(step, dtype)
      global_step_recomp = tf.minimum(global_step_recomp, decay_steps)
      completed_fraction = global_step_recomp / decay_steps
      cosine_decayed = 0.5 * (1.0 + tf.cos(
          tf.constant(math.pi) * completed_fraction))

      decayed = (1 - self.alpha) * cosine_decayed + self.alpha
      return tf.multiply(initial_learning_rate, decayed)

  def get_config(self):
    return {
        "initial_learning_rate": self.initial_learning_rate,
        "decay_steps": self.decay_steps,
        "alpha": self.alpha,
        "name": self.name
    }

Ancestors

Methods

def get_config(self)
Expand source code
def get_config(self):
  return {
      "initial_learning_rate": self.initial_learning_rate,
      "decay_steps": self.decay_steps,
      "alpha": self.alpha,
      "name": self.name
  }

Inherited members

class CosineDecayRestarts (initial_learning_rate, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0, name=None)

A LearningRateSchedule that uses a cosine decay schedule with restarts.

See Loshchilov & Hutter, ICLR2016, SGDR: Stochastic Gradient Descent with Warm Restarts.

When training a model, it is often useful to lower the learning rate as the training progresses. This schedule applies a cosine decay function with restarts to an optimizer step, given a provided initial learning rate. It requires a step value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step.

The schedule a 1-arg callable that produces a decayed learning rate when passed the current optimizer step. This can be useful for changing the learning rate value across different invocations of optimizer functions.

The learning rate multiplier first decays from 1 to alpha for first_decay_steps steps. Then, a warm restart is performed. Each new warm restart runs for t_mul times more steps and with m_mul times smaller initial learning rate.

Example usage:

first_decay_steps = 1000
lr_decayed_fn = (
  tf.keras.optimizers.schedules.CosineDecayRestarts(
      initial_learning_rate,
      first_decay_steps))

You can pass this schedule directly into a tf.keras.optimizers.Optimizer as the learning rate. The learning rate schedule is also serializable and deserializable using tf.keras.optimizers.schedules.serialize and tf.keras.optimizers.schedules.deserialize.

Returns

A 1-arg callable learning rate schedule that takes the current optimizer step and outputs the decayed learning rate, a scalar Tensor of the same type as initial_learning_rate. Applies cosine decay with restarts to the learning rate.

Args

initial_learning_rate
A scalar float32 or float64 Tensor or a Python number. The initial learning rate.
first_decay_steps
A scalar int32 or int64 Tensor or a Python number. Number of steps to decay over.
t_mul
A scalar float32 or float64 Tensor or a Python number. Used to derive the number of iterations in the i-th period
m_mul
A scalar float32 or float64 Tensor or a Python number. Used to derive the initial learning rate of the i-th period:
alpha
A scalar float32 or float64 Tensor or a Python number. Minimum learning rate value as a fraction of the initial_learning_rate.
name
String. Optional name of the operation. Defaults to 'SGDRDecay'.
Expand source code
class CosineDecayRestarts(LearningRateSchedule):
  """A LearningRateSchedule that uses a cosine decay schedule with restarts.

  See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
  SGDR: Stochastic Gradient Descent with Warm Restarts.

  When training a model, it is often useful to lower the learning rate as
  the training progresses. This schedule applies a cosine decay function with
  restarts to an optimizer step, given a provided initial learning rate.
  It requires a `step` value to compute the decayed learning rate. You can
  just pass a TensorFlow variable that you increment at each training step.

  The schedule a 1-arg callable that produces a decayed learning
  rate when passed the current optimizer step. This can be useful for changing
  the learning rate value across different invocations of optimizer functions.

  The learning rate multiplier first decays
  from 1 to `alpha` for `first_decay_steps` steps. Then, a warm
  restart is performed. Each new warm restart runs for `t_mul` times more
  steps and with `m_mul` times smaller initial learning rate.

  Example usage:
  ```python
  first_decay_steps = 1000
  lr_decayed_fn = (
    tf.keras.optimizers.schedules.CosineDecayRestarts(
        initial_learning_rate,
        first_decay_steps))
  ```

  You can pass this schedule directly into a `tf.keras.optimizers.Optimizer`
  as the learning rate. The learning rate schedule is also serializable and
  deserializable using `tf.keras.optimizers.schedules.serialize` and
  `tf.keras.optimizers.schedules.deserialize`.

  Returns:
    A 1-arg callable learning rate schedule that takes the current optimizer
    step and outputs the decayed learning rate, a scalar `Tensor` of the same
    type as `initial_learning_rate`.
  """

  def __init__(
      self,
      initial_learning_rate,
      first_decay_steps,
      t_mul=2.0,
      m_mul=1.0,
      alpha=0.0,
      name=None):
    """Applies cosine decay with restarts to the learning rate.

    Args:
      initial_learning_rate: A scalar `float32` or `float64` Tensor or a Python
        number. The initial learning rate.
      first_decay_steps: A scalar `int32` or `int64` `Tensor` or a Python
        number. Number of steps to decay over.
      t_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
        Used to derive the number of iterations in the i-th period
      m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
        Used to derive the initial learning rate of the i-th period:
      alpha: A scalar `float32` or `float64` Tensor or a Python number.
        Minimum learning rate value as a fraction of the initial_learning_rate.
      name: String. Optional name of the operation.  Defaults to 'SGDRDecay'.
    """
    super(CosineDecayRestarts, self).__init__()

    self.initial_learning_rate = initial_learning_rate
    self.first_decay_steps = first_decay_steps
    self._t_mul = t_mul
    self._m_mul = m_mul
    self.alpha = alpha
    self.name = name

  def __call__(self, step):
    with tf.name_scope(self.name or "SGDRDecay") as name:
      initial_learning_rate = tf.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      first_decay_steps = tf.cast(self.first_decay_steps, dtype)
      alpha = tf.cast(self.alpha, dtype)
      t_mul = tf.cast(self._t_mul, dtype)
      m_mul = tf.cast(self._m_mul, dtype)

      global_step_recomp = tf.cast(step, dtype)
      completed_fraction = global_step_recomp / first_decay_steps

      def compute_step(completed_fraction, geometric=False):
        """Helper for `cond` operation."""
        if geometric:
          i_restart = tf.floor(
              tf.math.log(1.0 - completed_fraction * (1.0 - t_mul)) /
              tf.math.log(t_mul))

          sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul)
          completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart

        else:
          i_restart = tf.floor(completed_fraction)
          completed_fraction -= i_restart

        return i_restart, completed_fraction

      i_restart, completed_fraction = tf.cond(
          tf.equal(t_mul, 1.0),
          lambda: compute_step(completed_fraction, geometric=False),
          lambda: compute_step(completed_fraction, geometric=True))

      m_fac = m_mul**i_restart
      cosine_decayed = 0.5 * m_fac * (1.0 + tf.cos(
          tf.constant(math.pi) * completed_fraction))
      decayed = (1 - alpha) * cosine_decayed + alpha

      return tf.multiply(initial_learning_rate, decayed, name=name)

  def get_config(self):
    return {
        "initial_learning_rate": self.initial_learning_rate,
        "first_decay_steps": self.first_decay_steps,
        "t_mul": self._t_mul,
        "m_mul": self._m_mul,
        "alpha": self.alpha,
        "name": self.name
    }

Ancestors

Methods

def get_config(self)
Expand source code
def get_config(self):
  return {
      "initial_learning_rate": self.initial_learning_rate,
      "first_decay_steps": self.first_decay_steps,
      "t_mul": self._t_mul,
      "m_mul": self._m_mul,
      "alpha": self.alpha,
      "name": self.name
  }

Inherited members

class LinearModel (units=1, activation=None, use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, **kwargs)

Linear Model for regression and classification problems.

This model approximates the following function: $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$ where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature.

Example:

model = LinearModel()
model.compile(optimizer='sgd', loss='mse')
model.fit(x, y, epochs=epochs)

This model accepts sparse float inputs as well:

Example:

model = LinearModel()
opt = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MeanSquaredError()
with tf.GradientTape() as tape:
  output = model(sparse_input)
  loss = tf.reduce_mean(loss_fn(target, output))
grads = tape.gradient(loss, model.weights)
opt.apply_gradients(zip(grads, model.weights))

Create a Linear Model.

Args

units
Positive integer, output dimension without the batch size.
activation
Activation function to use. If you don't specify anything, no activation is applied.
use_bias
whether to calculate the bias/intercept for this model. If set to False, no bias/intercept will be used in calculations, e.g., the data is already centered.
kernel_initializer
Initializer for the kernel weights matrices.
bias_initializer
Initializer for the bias vector.
kernel_regularizer
regularizer for kernel vectors.
bias_regularizer
regularizer for bias vector.
**kwargs
The keyword arguments that are passed on to BaseLayer.init.
Expand source code
class LinearModel(training.Model):
  r"""Linear Model for regression and classification problems.

  This model approximates the following function:
  $$y = \beta + \sum_{i=1}^{N} w_{i} * x_{i}$$
  where $$\beta$$ is the bias and $$w_{i}$$ is the weight for each feature.

  Example:

  ```python
  model = LinearModel()
  model.compile(optimizer='sgd', loss='mse')
  model.fit(x, y, epochs=epochs)
  ```

  This model accepts sparse float inputs as well:

  Example:
  ```python
  model = LinearModel()
  opt = tf.keras.optimizers.Adam()
  loss_fn = tf.keras.losses.MeanSquaredError()
  with tf.GradientTape() as tape:
    output = model(sparse_input)
    loss = tf.reduce_mean(loss_fn(target, output))
  grads = tape.gradient(loss, model.weights)
  opt.apply_gradients(zip(grads, model.weights))
  ```

  """

  def __init__(self,
               units=1,
               activation=None,
               use_bias=True,
               kernel_initializer='zeros',
               bias_initializer='zeros',
               kernel_regularizer=None,
               bias_regularizer=None,
               **kwargs):
    """Create a Linear Model.

    Args:
      units: Positive integer, output dimension without the batch size.
      activation: Activation function to use.
        If you don't specify anything, no activation is applied.
      use_bias: whether to calculate the bias/intercept for this model. If set
        to False, no bias/intercept will be used in calculations, e.g., the data
        is already centered.
      kernel_initializer: Initializer for the `kernel` weights matrices.
      bias_initializer: Initializer for the bias vector.
      kernel_regularizer: regularizer for kernel vectors.
      bias_regularizer: regularizer for bias vector.
      **kwargs: The keyword arguments that are passed on to BaseLayer.__init__.
    """

    self.units = units
    self.activation = activations.get(activation)
    self.use_bias = use_bias
    self.kernel_initializer = initializers.get(kernel_initializer)
    self.bias_initializer = initializers.get(bias_initializer)
    self.kernel_regularizer = regularizers.get(kernel_regularizer)
    self.bias_regularizer = regularizers.get(bias_regularizer)
    super(LinearModel, self).__init__(**kwargs)
    base_layer.keras_premade_model_gauge.get_cell('Linear').set(True)

  def build(self, input_shape):
    if isinstance(input_shape, dict):
      names = sorted(list(input_shape.keys()))
      self.input_specs = []
      self.dense_layers = []
      for name in names:
        shape = input_shape[name]
        layer = core.Dense(
            units=self.units,
            use_bias=False,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer,
            name=name)
        layer.build(shape)
        self.input_specs.append(
            input_spec.InputSpec(shape=shape, name=name))
        self.dense_layers.append(layer)
    elif isinstance(input_shape, (tuple, list)) and all(
        isinstance(shape, tf.TensorShape) for shape in input_shape):
      self.dense_layers = []
      for shape in input_shape:
        layer = core.Dense(
            units=self.units,
            use_bias=False,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer)
        layer.build(shape)
        self.dense_layers.append(layer)
    else:
      # input_shape can be a single TensorShape or a tuple of ints.
      layer = core.Dense(
          units=self.units,
          use_bias=False,
          kernel_initializer=self.kernel_initializer,
          kernel_regularizer=self.kernel_regularizer)
      layer.build(input_shape)
      self.dense_layers = [layer]

    if self.use_bias:
      self.bias = self.add_weight(
          'bias',
          shape=self.units,
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          dtype=self.dtype,
          trainable=True)
    else:
      self.bias = None
    self.built = True

  def call(self, inputs):
    result = None
    if isinstance(inputs, dict):
      names = [layer.name for layer in self.dense_layers]
      different_keys = set(names) - set(inputs.keys())
      if different_keys:
        raise ValueError(
            'The input dictionary does not match '
            'the structure expected by the model.'
            '\n\tExpected keys: {}'
            '\n\tReceived keys: {}'
            '\n\tMissing keys: {}'.format(set(names), set(inputs.keys()),
                                          different_keys))
      inputs = [inputs[name] for name in names]
      for inp, layer in zip(inputs, self.dense_layers):
        output = layer(inp)
        if result is None:
          result = output
        else:
          result += output
    elif isinstance(inputs, (tuple, list)):
      for inp, layer in zip(inputs, self.dense_layers):
        output = layer(inp)
        if result is None:
          result = output
        else:
          result += output
    else:
      result = self.dense_layers[0](inputs)

    if self.use_bias:
      result = tf.nn.bias_add(result, self.bias)
    if self.activation is not None:
      return self.activation(result)  # pylint: disable=not-callable
    return result

  def get_config(self):
    config = {
        'units': self.units,
        'activation': activations.serialize(self.activation),
        'use_bias': self.use_bias,
        'kernel_initializer': initializers.serialize(self.kernel_initializer),
        'bias_initializer': initializers.serialize(self.bias_initializer),
        'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
        'bias_regularizer': regularizers.serialize(self.bias_regularizer),
    }
    base_config = base_layer.Layer.get_config(self)
    return dict(list(base_config.items()) + list(config.items()))

  @classmethod
  def from_config(cls, config, custom_objects=None):
    del custom_objects
    return cls(**config)

Ancestors

Inherited members

class PeepholeLSTMCell (units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, **kwargs)

Equivalent to LSTMCell class but adds peephole connections.

Peephole connections allow the gates to utilize the previous internal state as well as the previous hidden state (which is what LSTMCell is limited to). This allows PeepholeLSTMCell to better learn precise timings over LSTMCell.

From Gers et al., 2002:

"We find that LSTM augmented by 'peephole connections' from its internal cells to its multiplicative gates can learn the fine distinction between sequences of spikes spaced either 50 or 49 time steps apart without the help of any short training exemplars."

The peephole implementation is based on:

Sak et al., 2014

Example:

# Create 2 PeepholeLSTMCells
peephole_lstm_cells = [PeepholeLSTMCell(size) for size in [128, 256]]
# Create a layer composed sequentially of the peephole LSTM cells.
layer = RNN(peephole_lstm_cells)
input = keras.Input((timesteps, input_dim))
output = layer(input)
Expand source code
class PeepholeLSTMCell(LSTMCell):
  """Equivalent to LSTMCell class but adds peephole connections.

  Peephole connections allow the gates to utilize the previous internal state as
  well as the previous hidden state (which is what LSTMCell is limited to).
  This allows PeepholeLSTMCell to better learn precise timings over LSTMCell.

  From [Gers et al., 2002](
    http://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf):

  "We find that LSTM augmented by 'peephole connections' from its internal
  cells to its multiplicative gates can learn the fine distinction between
  sequences of spikes spaced either 50 or 49 time steps apart without the help
  of any short training exemplars."

  The peephole implementation is based on:

  [Sak et al., 2014](https://research.google.com/pubs/archive/43905.pdf)

  Example:

  ```python
  # Create 2 PeepholeLSTMCells
  peephole_lstm_cells = [PeepholeLSTMCell(size) for size in [128, 256]]
  # Create a layer composed sequentially of the peephole LSTM cells.
  layer = RNN(peephole_lstm_cells)
  input = keras.Input((timesteps, input_dim))
  output = layer(input)
  ```
  """

  def __init__(self,
               units,
               activation='tanh',
               recurrent_activation='hard_sigmoid',
               use_bias=True,
               kernel_initializer='glorot_uniform',
               recurrent_initializer='orthogonal',
               bias_initializer='zeros',
               unit_forget_bias=True,
               kernel_regularizer=None,
               recurrent_regularizer=None,
               bias_regularizer=None,
               kernel_constraint=None,
               recurrent_constraint=None,
               bias_constraint=None,
               dropout=0.,
               recurrent_dropout=0.,
               **kwargs):
    warnings.warn('`tf.keras.experimental.PeepholeLSTMCell` is deprecated '
                  'and will be removed in a future version. '
                  'Please use tensorflow_addons.rnn.PeepholeLSTMCell '
                  'instead.')
    super(PeepholeLSTMCell, self).__init__(
        units=units,
        activation=activation,
        recurrent_activation=recurrent_activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        recurrent_initializer=recurrent_initializer,
        bias_initializer=bias_initializer,
        unit_forget_bias=unit_forget_bias,
        kernel_regularizer=kernel_regularizer,
        recurrent_regularizer=recurrent_regularizer,
        bias_regularizer=bias_regularizer,
        kernel_constraint=kernel_constraint,
        recurrent_constraint=recurrent_constraint,
        bias_constraint=bias_constraint,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        implementation=kwargs.pop('implementation', 1),
        **kwargs)

  def build(self, input_shape):
    super(PeepholeLSTMCell, self).build(input_shape)
    # The following are the weight matrices for the peephole connections. These
    # are multiplied with the previous internal state during the computation of
    # carry and output.
    self.input_gate_peephole_weights = self.add_weight(
        shape=(self.units,),
        name='input_gate_peephole_weights',
        initializer=self.kernel_initializer)
    self.forget_gate_peephole_weights = self.add_weight(
        shape=(self.units,),
        name='forget_gate_peephole_weights',
        initializer=self.kernel_initializer)
    self.output_gate_peephole_weights = self.add_weight(
        shape=(self.units,),
        name='output_gate_peephole_weights',
        initializer=self.kernel_initializer)

  def _compute_carry_and_output(self, x, h_tm1, c_tm1):
    x_i, x_f, x_c, x_o = x
    h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1
    i = self.recurrent_activation(
        x_i + backend.dot(h_tm1_i, self.recurrent_kernel[:, :self.units]) +
        self.input_gate_peephole_weights * c_tm1)
    f = self.recurrent_activation(x_f + backend.dot(
        h_tm1_f, self.recurrent_kernel[:, self.units:self.units * 2]) +
                                  self.forget_gate_peephole_weights * c_tm1)
    c = f * c_tm1 + i * self.activation(x_c + backend.dot(
        h_tm1_c, self.recurrent_kernel[:, self.units * 2:self.units * 3]))
    o = self.recurrent_activation(
        x_o + backend.dot(h_tm1_o, self.recurrent_kernel[:, self.units * 3:]) +
        self.output_gate_peephole_weights * c)
    return c, o

  def _compute_carry_and_output_fused(self, z, c_tm1):
    z0, z1, z2, z3 = z
    i = self.recurrent_activation(z0 +
                                  self.input_gate_peephole_weights * c_tm1)
    f = self.recurrent_activation(z1 +
                                  self.forget_gate_peephole_weights * c_tm1)
    c = f * c_tm1 + i * self.activation(z2)
    o = self.recurrent_activation(z3 + self.output_gate_peephole_weights * c)
    return c, o

Ancestors

Inherited members

class SequenceFeatures (feature_columns, trainable=True, name=None, **kwargs)

A layer for sequence input.

All feature_columns must be sequence dense columns with the same sequence_length. The output of this method can be fed into sequence networks, such as RNN.

The output of this method is a 3D Tensor of shape [batch_size, T, D]. T is the maximum sequence length for this batch, which could differ from batch to batch.

If multiple feature_columns are given with Di num_elements each, their outputs are concatenated. So, the final Tensor has shape [batch_size, T, D0 + D1 + ... + Dn].

Example:


import tensorflow as tf

# Behavior of some cells or feature columns may depend on whether we are in
# training or inference mode, e.g. applying dropout.
training = True
rating = tf.feature_column.sequence_numeric_column('rating')
watches = tf.feature_column.sequence_categorical_column_with_identity(
    'watches', num_buckets=1000)
watches_embedding = tf.feature_column.embedding_column(watches,
                                            dimension=10)
columns = [rating, watches_embedding]

features = {
 'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0],
                                             [2.0,2.1,2.2, 2.3, 2.5]]),
 'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]])
}

sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns)
sequence_input, sequence_length = sequence_input_layer(
   features, training=training)
sequence_length_mask = tf.sequence_mask(sequence_length)
hidden_size = 32
rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
rnn_layer = tf.keras.layers.RNN(rnn_cell)
outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

"Constructs a SequenceFeatures layer.

Args

feature_columns
An iterable of dense sequence columns. Valid columns are - embedding_column that wraps a sequence_categorical_column_with_* - sequence_numeric_column.
trainable
Boolean, whether the layer's variables will be updated via gradient descent during training.
name
Name to give to the SequenceFeatures.
**kwargs
Keyword arguments to construct a layer.

Raises

ValueError
If any of the feature_columns is not a SequenceDenseColumn.
Expand source code
class SequenceFeatures(kfc._BaseFeaturesLayer):
  """A layer for sequence input.

    All `feature_columns` must be sequence dense columns with the same
    `sequence_length`. The output of this method can be fed into sequence
    networks, such as RNN.

    The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
    `T` is the maximum sequence length for this batch, which could differ from
    batch to batch.

    If multiple `feature_columns` are given with `Di` `num_elements` each, their
    outputs are concatenated. So, the final `Tensor` has shape
    `[batch_size, T, D0 + D1 + ... + Dn]`.

    Example:

    ```python

    import tensorflow as tf

    # Behavior of some cells or feature columns may depend on whether we are in
    # training or inference mode, e.g. applying dropout.
    training = True
    rating = tf.feature_column.sequence_numeric_column('rating')
    watches = tf.feature_column.sequence_categorical_column_with_identity(
        'watches', num_buckets=1000)
    watches_embedding = tf.feature_column.embedding_column(watches,
                                                dimension=10)
    columns = [rating, watches_embedding]

    features = {
     'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0],
                                                 [2.0,2.1,2.2, 2.3, 2.5]]),
     'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]])
    }

    sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns)
    sequence_input, sequence_length = sequence_input_layer(
       features, training=training)
    sequence_length_mask = tf.sequence_mask(sequence_length)
    hidden_size = 32
    rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
    rnn_layer = tf.keras.layers.RNN(rnn_cell)
    outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
    ```
  """

  def __init__(
      self,
      feature_columns,
      trainable=True,
      name=None,
      **kwargs):
    """"Constructs a SequenceFeatures layer.

    Args:
      feature_columns: An iterable of dense sequence columns. Valid columns are
        - `embedding_column` that wraps a `sequence_categorical_column_with_*`
        - `sequence_numeric_column`.
      trainable: Boolean, whether the layer's variables will be updated via
        gradient descent during training.
      name: Name to give to the SequenceFeatures.
      **kwargs: Keyword arguments to construct a layer.

    Raises:
      ValueError: If any of the `feature_columns` is not a
        `SequenceDenseColumn`.
    """
    super(SequenceFeatures, self).__init__(
        feature_columns=feature_columns,
        trainable=trainable,
        name=name,
        expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn,
        **kwargs)

  @property
  def _is_feature_layer(self):
    return True

  def _target_shape(self, input_shape, total_elements):
    return (input_shape[0], input_shape[1], total_elements)

  def call(self, features, training=None):
    """Returns sequence input corresponding to the `feature_columns`.

    Args:
      features: A dict mapping keys to tensors.
      training: Python boolean or None, indicating whether to the layer is being
        run in training mode. This argument is passed to the call method of any
        `FeatureColumn` that takes a `training` argument. For example, if a
        `FeatureColumn` performed dropout, the column could expose a `training`
        argument to control whether the dropout should be applied. If `None`,
        defaults to `tf.keras.backend.learning_phase()`.


    Returns:
      An `(input_layer, sequence_length)` tuple where:
      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
          `T` is the maximum sequence length for this batch, which could differ
          from batch to batch. `D` is the sum of `num_elements` for all
          `feature_columns`.
      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
          length for each example.

    Raises:
      ValueError: If features are not a dictionary.
    """
    if not isinstance(features, dict):
      raise ValueError('We expected a dictionary here. Instead we got: ',
                       features)
    if training is None:
      training = backend.learning_phase()
    transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features)
    output_tensors = []
    sequence_lengths = []

    for column in self._feature_columns:
      with backend.name_scope(column.name):
        try:
          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
              transformation_cache, self._state_manager, training=training)
        except TypeError:
          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
              transformation_cache, self._state_manager)
        # Flattens the final dimension to produce a 3D Tensor.
        output_tensors.append(self._process_dense_tensor(column, dense_tensor))
        sequence_lengths.append(sequence_length)

    # Check and process sequence lengths.
    kfc._verify_static_batch_size_equality(    # pylint: disable=protected-access
        sequence_lengths, self._feature_columns)
    sequence_length = _assert_all_equal_and_return(sequence_lengths)

    return self._verify_and_concat_tensors(output_tensors), sequence_length

Ancestors

  • keras.feature_column.base_feature_layer._BaseFeaturesLayer
  • Layer
  • tensorflow.python.module.module.Module
  • tensorflow.python.training.tracking.tracking.AutoTrackable
  • tensorflow.python.training.tracking.base.Trackable
  • LayerVersionSelector

Methods

def call(self, features, training=None)

Returns sequence input corresponding to the feature_columns.

Args

features
A dict mapping keys to tensors.
training
Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any FeatureColumn that takes a training argument. For example, if a FeatureColumn performed dropout, the column could expose a training argument to control whether the dropout should be applied. If None, defaults to tf.keras.backend.learning_phase().

Returns

An (input_layer, sequence_length) tuple where: - input_layer: A float Tensor of shape [batch_size, T, D]. T is the maximum sequence length for this batch, which could differ from batch to batch. D is the sum of num_elements for all feature_columns. - sequence_length: An int Tensor of shape [batch_size]. The sequence length for each example.

Raises

ValueError
If features are not a dictionary.
Expand source code
def call(self, features, training=None):
  """Returns sequence input corresponding to the `feature_columns`.

  Args:
    features: A dict mapping keys to tensors.
    training: Python boolean or None, indicating whether to the layer is being
      run in training mode. This argument is passed to the call method of any
      `FeatureColumn` that takes a `training` argument. For example, if a
      `FeatureColumn` performed dropout, the column could expose a `training`
      argument to control whether the dropout should be applied. If `None`,
      defaults to `tf.keras.backend.learning_phase()`.


  Returns:
    An `(input_layer, sequence_length)` tuple where:
    - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
        `T` is the maximum sequence length for this batch, which could differ
        from batch to batch. `D` is the sum of `num_elements` for all
        `feature_columns`.
    - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
        length for each example.

  Raises:
    ValueError: If features are not a dictionary.
  """
  if not isinstance(features, dict):
    raise ValueError('We expected a dictionary here. Instead we got: ',
                     features)
  if training is None:
    training = backend.learning_phase()
  transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features)
  output_tensors = []
  sequence_lengths = []

  for column in self._feature_columns:
    with backend.name_scope(column.name):
      try:
        dense_tensor, sequence_length = column.get_sequence_dense_tensor(
            transformation_cache, self._state_manager, training=training)
      except TypeError:
        dense_tensor, sequence_length = column.get_sequence_dense_tensor(
            transformation_cache, self._state_manager)
      # Flattens the final dimension to produce a 3D Tensor.
      output_tensors.append(self._process_dense_tensor(column, dense_tensor))
      sequence_lengths.append(sequence_length)

  # Check and process sequence lengths.
  kfc._verify_static_batch_size_equality(    # pylint: disable=protected-access
      sequence_lengths, self._feature_columns)
  sequence_length = _assert_all_equal_and_return(sequence_lengths)

  return self._verify_and_concat_tensors(output_tensors), sequence_length

Inherited members

class WideDeepModel (linear_model, dnn_model, activation=None, **kwargs)

Wide & Deep Model for regression and classification problems.

This model jointly train a linear and a dnn model.

Example:

linear_model = LinearModel()
dnn_model = keras.Sequential([keras.layers.Dense(units=64),
                             keras.layers.Dense(units=1)])
combined_model = WideDeepModel(linear_model, dnn_model)
combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse'])
# define dnn_inputs and linear_inputs as separate numpy arrays or
# a single numpy array if dnn_inputs is same as linear_inputs.
combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
# or define a single `tf.data.Dataset` that contains a single tensor or
# separate tensors for dnn_inputs and linear_inputs.
dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y))
combined_model.fit(dataset, epochs)

Both linear and dnn model can be pre-compiled and trained separately before jointly training:

Example:

linear_model = LinearModel()
linear_model.compile('adagrad', 'mse')
linear_model.fit(linear_inputs, y, epochs)
dnn_model = keras.Sequential([keras.layers.Dense(units=1)])
dnn_model.compile('rmsprop', 'mse')
dnn_model.fit(dnn_inputs, y, epochs)
combined_model = WideDeepModel(linear_model, dnn_model)
combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse'])
combined_model.fit([linear_inputs, dnn_inputs], y, epochs)

Create a Wide & Deep Model.

Args

linear_model
a premade LinearModel, its output must match the output of the dnn model.
dnn_model
a tf.keras.Model, its output must match the output of the linear model.
activation
Activation function. Set it to None to maintain a linear activation.
**kwargs
The keyword arguments that are passed on to BaseLayer.init. Allowed keyword arguments include name.
Expand source code
class WideDeepModel(keras_training.Model):
  r"""Wide & Deep Model for regression and classification problems.

  This model jointly train a linear and a dnn model.

  Example:

  ```python
  linear_model = LinearModel()
  dnn_model = keras.Sequential([keras.layers.Dense(units=64),
                               keras.layers.Dense(units=1)])
  combined_model = WideDeepModel(linear_model, dnn_model)
  combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse'])
  # define dnn_inputs and linear_inputs as separate numpy arrays or
  # a single numpy array if dnn_inputs is same as linear_inputs.
  combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
  # or define a single `tf.data.Dataset` that contains a single tensor or
  # separate tensors for dnn_inputs and linear_inputs.
  dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y))
  combined_model.fit(dataset, epochs)
  ```

  Both linear and dnn model can be pre-compiled and trained separately
  before jointly training:

  Example:
  ```python
  linear_model = LinearModel()
  linear_model.compile('adagrad', 'mse')
  linear_model.fit(linear_inputs, y, epochs)
  dnn_model = keras.Sequential([keras.layers.Dense(units=1)])
  dnn_model.compile('rmsprop', 'mse')
  dnn_model.fit(dnn_inputs, y, epochs)
  combined_model = WideDeepModel(linear_model, dnn_model)
  combined_model.compile(optimizer=['sgd', 'adam'], 'mse', ['mse'])
  combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
  ```

  """

  def __init__(self, linear_model, dnn_model, activation=None, **kwargs):
    """Create a Wide & Deep Model.

    Args:
      linear_model: a premade LinearModel, its output must match the output of
        the dnn model.
      dnn_model: a `tf.keras.Model`, its output must match the output of the
        linear model.
      activation: Activation function. Set it to None to maintain a linear
        activation.
      **kwargs: The keyword arguments that are passed on to BaseLayer.__init__.
        Allowed keyword arguments include `name`.
    """
    super(WideDeepModel, self).__init__(**kwargs)
    base_layer.keras_premade_model_gauge.get_cell('WideDeep').set(True)
    self.linear_model = linear_model
    self.dnn_model = dnn_model
    self.activation = activations.get(activation)

  def call(self, inputs, training=None):
    if not isinstance(inputs, (tuple, list)) or len(inputs) != 2:
      linear_inputs = dnn_inputs = inputs
    else:
      linear_inputs, dnn_inputs = inputs
    linear_output = self.linear_model(linear_inputs)
    # pylint: disable=protected-access
    if self.dnn_model._expects_training_arg:
      if training is None:
        training = backend.learning_phase()
      dnn_output = self.dnn_model(dnn_inputs, training=training)
    else:
      dnn_output = self.dnn_model(dnn_inputs)
    output = tf.nest.map_structure(lambda x, y: (x + y), linear_output, dnn_output)
    if self.activation:
      return tf.nest.map_structure(self.activation, output)
    return output

  # This does not support gradient scaling and LossScaleOptimizer.
  def train_step(self, data):
    x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
    x, y, sample_weight = data_adapter.expand_1d((x, y, sample_weight))

    with tf.GradientTape() as tape:
      y_pred = self(x, training=True)
      loss = self.compiled_loss(
          y, y_pred, sample_weight, regularization_losses=self.losses)
    self.compiled_metrics.update_state(y, y_pred, sample_weight)

    if isinstance(self.optimizer, (list, tuple)):
      linear_vars = self.linear_model.trainable_variables
      dnn_vars = self.dnn_model.trainable_variables
      linear_grads, dnn_grads = tape.gradient(loss, (linear_vars, dnn_vars))

      linear_optimizer = self.optimizer[0]
      dnn_optimizer = self.optimizer[1]
      linear_optimizer.apply_gradients(zip(linear_grads, linear_vars))
      dnn_optimizer.apply_gradients(zip(dnn_grads, dnn_vars))
    else:
      trainable_variables = self.trainable_variables
      grads = tape.gradient(loss, trainable_variables)
      self.optimizer.apply_gradients(zip(grads, trainable_variables))

    return {m.name: m.result() for m in self.metrics}

  def _make_train_function(self):
    # Only needed for graph mode and model_to_estimator.
    has_recompiled = self._recompile_weights_loss_and_weighted_metrics()
    self._check_trainable_weights_consistency()
    # If we have re-compiled the loss/weighted metric sub-graphs then create
    # train function even if one exists already. This is because
    # `_feed_sample_weights` list has been updated on re-compile.
    if getattr(self, 'train_function', None) is None or has_recompiled:
      # Restore the compiled trainable state.
      current_trainable_state = self._get_trainable_state()
      self._set_trainable_state(self._compiled_trainable_state)

      inputs = (
          self._feed_inputs + self._feed_targets + self._feed_sample_weights)
      if not isinstance(backend.symbolic_learning_phase(), int):
        inputs += [backend.symbolic_learning_phase()]

      if isinstance(self.optimizer, (list, tuple)):
        linear_optimizer = self.optimizer[0]
        dnn_optimizer = self.optimizer[1]
      else:
        linear_optimizer = self.optimizer
        dnn_optimizer = self.optimizer

      with backend.get_graph().as_default():
        with backend.name_scope('training'):
          # Training updates
          updates = []
          linear_updates = linear_optimizer.get_updates(
              params=self.linear_model.trainable_weights,  # pylint: disable=protected-access
              loss=self.total_loss)
          updates += linear_updates
          dnn_updates = dnn_optimizer.get_updates(
              params=self.dnn_model.trainable_weights,  # pylint: disable=protected-access
              loss=self.total_loss)
          updates += dnn_updates
          # Unconditional updates
          updates += self.get_updates_for(None)
          # Conditional updates relevant to this model
          updates += self.get_updates_for(self.inputs)

        metrics = self._get_training_eval_metrics()
        metrics_tensors = [
            m._call_result for m in metrics if hasattr(m, '_call_result')  # pylint: disable=protected-access
        ]

      with backend.name_scope('training'):
        # Gets loss and metrics. Updates weights at each call.
        fn = backend.function(
            inputs, [self.total_loss] + metrics_tensors,
            updates=updates,
            name='train_function',
            **self._function_kwargs)
        setattr(self, 'train_function', fn)

      # Restore the current trainable state
      self._set_trainable_state(current_trainable_state)

  def get_config(self):
    linear_config = generic_utils.serialize_keras_object(self.linear_model)
    dnn_config = generic_utils.serialize_keras_object(self.dnn_model)
    config = {
        'linear_model': linear_config,
        'dnn_model': dnn_config,
        'activation': activations.serialize(self.activation),
    }
    base_config = base_layer.Layer.get_config(self)
    return dict(list(base_config.items()) + list(config.items()))

  @classmethod
  def from_config(cls, config, custom_objects=None):
    linear_config = config.pop('linear_model')
    linear_model = layer_module.deserialize(linear_config, custom_objects)
    dnn_config = config.pop('dnn_model')
    dnn_model = layer_module.deserialize(dnn_config, custom_objects)
    activation = activations.deserialize(
        config.pop('activation', None), custom_objects=custom_objects)
    return cls(
        linear_model=linear_model,
        dnn_model=dnn_model,
        activation=activation,
        **config)

Ancestors

Inherited members