Module keras.distribute.saved_model_test_base

Base class for testing saving/loading with DS.

Expand source code
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base class for testing saving/loading with DS."""

import tensorflow.compat.v2 as tf

import os

from absl.testing import parameterized
import numpy as np
from keras import backend
from keras.distribute import model_combinations

_RANDOM_SEED = 1337
_DEFAULT_FUNCTION_KEY = 'serving_default'

_TOLERANCE = 1e-30
# TPU uses bfloat16 for computation in hardware underlying, so it has less
# precision than CPU/GPU.
_TPU_TOLERANCE = 1e-7

PREDICT_STEPS = 1

simple_models = [
    model_combinations.simple_functional_model,
    model_combinations.simple_sequential_model,
    model_combinations.simple_subclass_model,
]


strategies = [
    tf.__internal__.distribute.combinations.default_strategy,
    tf.__internal__.distribute.combinations.one_device_strategy,
    tf.__internal__.distribute.combinations.one_device_strategy_gpu,
    tf.__internal__.distribute.combinations.mirrored_strategy_with_one_cpu,
    tf.__internal__.distribute.combinations.mirrored_strategy_with_one_gpu,
    tf.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu,
    tf.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus,
    tf.__internal__.distribute.combinations.tpu_strategy,
    tf.__internal__.distribute.combinations.tpu_strategy_packed_var,
    tf.__internal__.distribute.combinations.central_storage_strategy_with_two_gpus,
]


def get_tolerance(save_distribution, restore_distribution):
  if backend.is_tpu_strategy(save_distribution) or backend.is_tpu_strategy(
      restore_distribution):
    return _TPU_TOLERANCE
  return _TOLERANCE


def simple_models_with_strategies():
  return tf.__internal__.test.combinations.combine(
      model_and_input=simple_models,
      distribution=strategies,
      mode=['eager'])


def simple_models_with_strategy_pairs():
  return tf.__internal__.test.combinations.combine(
      model_and_input=simple_models,
      distribution_for_saving=strategies,
      distribution_for_restoring=strategies,
      mode=['eager'])


def tfmodule_models_with_strategies():
  return tf.__internal__.test.combinations.combine(
      model_and_input=[model_combinations.simple_tfmodule_model],
      distribution=strategies,
      mode=['eager'])


def tfmodule_models_with_strategy_pairs():
  return tf.__internal__.test.combinations.combine(
      model_and_input=[model_combinations.simple_tfmodule_model],
      distribution_for_saving=strategies,
      distribution_for_restoring=strategies,
      mode=['eager'])


def load_and_run_with_saved_model_api(distribution, saved_dir, predict_dataset,
                                      output_name):
  """Loads a saved_model using tf.saved_model API, and runs it."""
  func = tf.saved_model.load(saved_dir)
  if distribution:
    dist_predict_dataset = distribution.experimental_distribute_dataset(
        predict_dataset)
    per_replica_predict_data = next(iter(dist_predict_dataset))
    result = distribution.run(
        func.signatures[_DEFAULT_FUNCTION_KEY],
        args=(per_replica_predict_data,))
    result = result[output_name]

    # Convert the per_replica value to a list, then concatenate them
    reduced = distribution.experimental_local_results(result)
    concat = tf.concat(reduced, 0)
    return concat
  else:
    result = func.signatures[_DEFAULT_FUNCTION_KEY](next(iter(predict_dataset)))
    return result[output_name]


class TestSavedModelBase(tf.test.TestCase, parameterized.TestCase):
  """Base class for testing saving/loading with DS."""

  def setUp(self):
    np.random.seed(_RANDOM_SEED)
    tf.compat.v1.set_random_seed(_RANDOM_SEED)
    self._root_dir = 'base'
    super(TestSavedModelBase, self).setUp()

  def _save_model(self, model, saved_dir):
    """Save the given model to the given saved_dir.

    This method needs to be implemented by the subclasses.

    Args:
      model: a keras model object to save.
      saved_dir: a string representing the path to save the keras model
    """
    raise NotImplementedError('must be implemented in descendants')

  def _load_and_run_model(self,
                          distribution,
                          saved_dir,
                          predict_dataset,
                          output_name='output_1'):
    """Load the model and run 1 step of predict with it.

    This method must be implemented by the subclasses.

    Args:
      distribution: the distribution strategy used to load the model. None if no
        distribution strategy is used
      saved_dir: the string representing the path where the model is saved.
      predict_dataset: the data used to do the predict on the model for
        cross_replica context.
      output_name: the string representing the name of the output layer of the
        model.
    """

    raise NotImplementedError('must be implemented in descendants')

  def _train_model(self, model, x_train, y_train, batch_size):
    training_dataset = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train))
    training_dataset = training_dataset.repeat()
    training_dataset = training_dataset.batch(batch_size)

    # Train the model for 1 epoch
    model.fit(x=training_dataset, epochs=1, steps_per_epoch=100)

  def _predict_with_model(self, distribution, model, predict_dataset):
    return model.predict(predict_dataset, steps=PREDICT_STEPS)

  def _get_predict_dataset(self, x_predict, batch_size):
    predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict)
    predict_dataset = predict_dataset.repeat()
    predict_dataset = predict_dataset.batch(batch_size)
    return predict_dataset

  def run_test_save_no_strategy_restore_strategy(self, model_and_input,
                                                 distribution):
    """Save a model without DS, and restore it with DS."""

    saved_dir = os.path.join(self.get_temp_dir(), '0')

    model = model_and_input.get_model()
    x_train, y_train, x_predict = model_and_input.get_data()
    batch_size = model_and_input.get_batch_size()
    predict_dataset = self._get_predict_dataset(x_predict, batch_size)

    self._train_model(model, x_train, y_train, batch_size)
    result_before_save = self._predict_with_model(None, model, predict_dataset)

    self._save_model(model, saved_dir)

    with distribution.scope():
      result_after_save = self._load_and_run_model(
          distribution=distribution,
          saved_dir=saved_dir,
          predict_dataset=predict_dataset)

    tolerance = get_tolerance(None, distribution)
    self.assertAllClose(result_before_save, result_after_save, atol=tolerance)

  def run_test_save_strategy_restore_no_strategy(self, model_and_input,
                                                 distribution, save_in_scope):
    """Save a model with DS, and restore it without DS."""

    saved_dir = os.path.join(self.get_temp_dir(), '1')

    with distribution.scope():
      model = model_and_input.get_model()
      x_train, y_train, x_predict = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()

      self._train_model(model, x_train, y_train, batch_size)
      predict_dataset = self._get_predict_dataset(x_predict, batch_size)
      result_before_save = self._predict_with_model(
          distribution, model, predict_dataset)

    if save_in_scope:
      with distribution.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)

    load_result = self._load_and_run_model(
        distribution=None,
        saved_dir=saved_dir,
        predict_dataset=predict_dataset)

    tolerance = get_tolerance(distribution, None)
    self.assertAllClose(result_before_save, load_result, atol=tolerance)

  def run_test_save_strategy_restore_strategy(self, model_and_input,
                                              distribution_for_saving,
                                              distribution_for_restoring,
                                              save_in_scope):
    """Save a model with DS, and restore it with potentially different DS."""
    saved_dir = os.path.join(self.get_temp_dir(), '2')

    with distribution_for_saving.scope():
      model = model_and_input.get_model()
      x_train, y_train, x_predict = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()

      self._train_model(model, x_train, y_train, batch_size)
      predict_dataset = self._get_predict_dataset(x_predict, batch_size)
      result_before_save = self._predict_with_model(
          distribution_for_saving, model, predict_dataset)

    if save_in_scope:
      with distribution_for_saving.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)

    with distribution_for_restoring.scope():

      load_result = self._load_and_run_model(
          distribution=distribution_for_restoring,
          saved_dir=saved_dir,
          predict_dataset=predict_dataset)

    tolerance = get_tolerance(distribution_for_saving,
                              distribution_for_restoring)
    self.assertAllClose(result_before_save, load_result, atol=tolerance)

  def run_test_save_strategy(self, model_and_input,
                             distribution, save_in_scope):
    """Save a model with DS."""
    saved_dir = os.path.join(self.get_temp_dir(), '3')
    with distribution.scope():
      model = model_and_input.get_model()
      x_train, y_train, _ = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()
      self._train_model(model, x_train, y_train, batch_size)

    if save_in_scope:
      with distribution.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)
    return saved_dir

Functions

def get_tolerance(save_distribution, restore_distribution)
Expand source code
def get_tolerance(save_distribution, restore_distribution):
  if backend.is_tpu_strategy(save_distribution) or backend.is_tpu_strategy(
      restore_distribution):
    return _TPU_TOLERANCE
  return _TOLERANCE
def load_and_run_with_saved_model_api(distribution, saved_dir, predict_dataset, output_name)

Loads a saved_model using tf.saved_model API, and runs it.

Expand source code
def load_and_run_with_saved_model_api(distribution, saved_dir, predict_dataset,
                                      output_name):
  """Loads a saved_model using tf.saved_model API, and runs it."""
  func = tf.saved_model.load(saved_dir)
  if distribution:
    dist_predict_dataset = distribution.experimental_distribute_dataset(
        predict_dataset)
    per_replica_predict_data = next(iter(dist_predict_dataset))
    result = distribution.run(
        func.signatures[_DEFAULT_FUNCTION_KEY],
        args=(per_replica_predict_data,))
    result = result[output_name]

    # Convert the per_replica value to a list, then concatenate them
    reduced = distribution.experimental_local_results(result)
    concat = tf.concat(reduced, 0)
    return concat
  else:
    result = func.signatures[_DEFAULT_FUNCTION_KEY](next(iter(predict_dataset)))
    return result[output_name]
def simple_models_with_strategies()
Expand source code
def simple_models_with_strategies():
  return tf.__internal__.test.combinations.combine(
      model_and_input=simple_models,
      distribution=strategies,
      mode=['eager'])
def simple_models_with_strategy_pairs()
Expand source code
def simple_models_with_strategy_pairs():
  return tf.__internal__.test.combinations.combine(
      model_and_input=simple_models,
      distribution_for_saving=strategies,
      distribution_for_restoring=strategies,
      mode=['eager'])
def tfmodule_models_with_strategies()
Expand source code
def tfmodule_models_with_strategies():
  return tf.__internal__.test.combinations.combine(
      model_and_input=[model_combinations.simple_tfmodule_model],
      distribution=strategies,
      mode=['eager'])
def tfmodule_models_with_strategy_pairs()
Expand source code
def tfmodule_models_with_strategy_pairs():
  return tf.__internal__.test.combinations.combine(
      model_and_input=[model_combinations.simple_tfmodule_model],
      distribution_for_saving=strategies,
      distribution_for_restoring=strategies,
      mode=['eager'])

Classes

class TestSavedModelBase (methodName='runTest')

Base class for testing saving/loading with DS.

Create an instance of the class that will use the named test method when executed. Raises a ValueError if the instance does not have a method with the specified name.

Expand source code
class TestSavedModelBase(tf.test.TestCase, parameterized.TestCase):
  """Base class for testing saving/loading with DS."""

  def setUp(self):
    np.random.seed(_RANDOM_SEED)
    tf.compat.v1.set_random_seed(_RANDOM_SEED)
    self._root_dir = 'base'
    super(TestSavedModelBase, self).setUp()

  def _save_model(self, model, saved_dir):
    """Save the given model to the given saved_dir.

    This method needs to be implemented by the subclasses.

    Args:
      model: a keras model object to save.
      saved_dir: a string representing the path to save the keras model
    """
    raise NotImplementedError('must be implemented in descendants')

  def _load_and_run_model(self,
                          distribution,
                          saved_dir,
                          predict_dataset,
                          output_name='output_1'):
    """Load the model and run 1 step of predict with it.

    This method must be implemented by the subclasses.

    Args:
      distribution: the distribution strategy used to load the model. None if no
        distribution strategy is used
      saved_dir: the string representing the path where the model is saved.
      predict_dataset: the data used to do the predict on the model for
        cross_replica context.
      output_name: the string representing the name of the output layer of the
        model.
    """

    raise NotImplementedError('must be implemented in descendants')

  def _train_model(self, model, x_train, y_train, batch_size):
    training_dataset = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train))
    training_dataset = training_dataset.repeat()
    training_dataset = training_dataset.batch(batch_size)

    # Train the model for 1 epoch
    model.fit(x=training_dataset, epochs=1, steps_per_epoch=100)

  def _predict_with_model(self, distribution, model, predict_dataset):
    return model.predict(predict_dataset, steps=PREDICT_STEPS)

  def _get_predict_dataset(self, x_predict, batch_size):
    predict_dataset = tf.data.Dataset.from_tensor_slices(x_predict)
    predict_dataset = predict_dataset.repeat()
    predict_dataset = predict_dataset.batch(batch_size)
    return predict_dataset

  def run_test_save_no_strategy_restore_strategy(self, model_and_input,
                                                 distribution):
    """Save a model without DS, and restore it with DS."""

    saved_dir = os.path.join(self.get_temp_dir(), '0')

    model = model_and_input.get_model()
    x_train, y_train, x_predict = model_and_input.get_data()
    batch_size = model_and_input.get_batch_size()
    predict_dataset = self._get_predict_dataset(x_predict, batch_size)

    self._train_model(model, x_train, y_train, batch_size)
    result_before_save = self._predict_with_model(None, model, predict_dataset)

    self._save_model(model, saved_dir)

    with distribution.scope():
      result_after_save = self._load_and_run_model(
          distribution=distribution,
          saved_dir=saved_dir,
          predict_dataset=predict_dataset)

    tolerance = get_tolerance(None, distribution)
    self.assertAllClose(result_before_save, result_after_save, atol=tolerance)

  def run_test_save_strategy_restore_no_strategy(self, model_and_input,
                                                 distribution, save_in_scope):
    """Save a model with DS, and restore it without DS."""

    saved_dir = os.path.join(self.get_temp_dir(), '1')

    with distribution.scope():
      model = model_and_input.get_model()
      x_train, y_train, x_predict = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()

      self._train_model(model, x_train, y_train, batch_size)
      predict_dataset = self._get_predict_dataset(x_predict, batch_size)
      result_before_save = self._predict_with_model(
          distribution, model, predict_dataset)

    if save_in_scope:
      with distribution.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)

    load_result = self._load_and_run_model(
        distribution=None,
        saved_dir=saved_dir,
        predict_dataset=predict_dataset)

    tolerance = get_tolerance(distribution, None)
    self.assertAllClose(result_before_save, load_result, atol=tolerance)

  def run_test_save_strategy_restore_strategy(self, model_and_input,
                                              distribution_for_saving,
                                              distribution_for_restoring,
                                              save_in_scope):
    """Save a model with DS, and restore it with potentially different DS."""
    saved_dir = os.path.join(self.get_temp_dir(), '2')

    with distribution_for_saving.scope():
      model = model_and_input.get_model()
      x_train, y_train, x_predict = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()

      self._train_model(model, x_train, y_train, batch_size)
      predict_dataset = self._get_predict_dataset(x_predict, batch_size)
      result_before_save = self._predict_with_model(
          distribution_for_saving, model, predict_dataset)

    if save_in_scope:
      with distribution_for_saving.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)

    with distribution_for_restoring.scope():

      load_result = self._load_and_run_model(
          distribution=distribution_for_restoring,
          saved_dir=saved_dir,
          predict_dataset=predict_dataset)

    tolerance = get_tolerance(distribution_for_saving,
                              distribution_for_restoring)
    self.assertAllClose(result_before_save, load_result, atol=tolerance)

  def run_test_save_strategy(self, model_and_input,
                             distribution, save_in_scope):
    """Save a model with DS."""
    saved_dir = os.path.join(self.get_temp_dir(), '3')
    with distribution.scope():
      model = model_and_input.get_model()
      x_train, y_train, _ = model_and_input.get_data()
      batch_size = model_and_input.get_batch_size()
      self._train_model(model, x_train, y_train, batch_size)

    if save_in_scope:
      with distribution.scope():
        self._save_model(model, saved_dir)
    else:
      self._save_model(model, saved_dir)
    return saved_dir

Ancestors

  • tensorflow.python.framework.test_util.TensorFlowTestCase
  • absl.testing.parameterized.TestCase
  • absl.testing.absltest.TestCase
  • absl.third_party.unittest3_backport.case.TestCase
  • unittest.case.TestCase

Methods

def run_test_save_no_strategy_restore_strategy(self, model_and_input, distribution)

Save a model without DS, and restore it with DS.

Expand source code
def run_test_save_no_strategy_restore_strategy(self, model_and_input,
                                               distribution):
  """Save a model without DS, and restore it with DS."""

  saved_dir = os.path.join(self.get_temp_dir(), '0')

  model = model_and_input.get_model()
  x_train, y_train, x_predict = model_and_input.get_data()
  batch_size = model_and_input.get_batch_size()
  predict_dataset = self._get_predict_dataset(x_predict, batch_size)

  self._train_model(model, x_train, y_train, batch_size)
  result_before_save = self._predict_with_model(None, model, predict_dataset)

  self._save_model(model, saved_dir)

  with distribution.scope():
    result_after_save = self._load_and_run_model(
        distribution=distribution,
        saved_dir=saved_dir,
        predict_dataset=predict_dataset)

  tolerance = get_tolerance(None, distribution)
  self.assertAllClose(result_before_save, result_after_save, atol=tolerance)
def run_test_save_strategy(self, model_and_input, distribution, save_in_scope)

Save a model with DS.

Expand source code
def run_test_save_strategy(self, model_and_input,
                           distribution, save_in_scope):
  """Save a model with DS."""
  saved_dir = os.path.join(self.get_temp_dir(), '3')
  with distribution.scope():
    model = model_and_input.get_model()
    x_train, y_train, _ = model_and_input.get_data()
    batch_size = model_and_input.get_batch_size()
    self._train_model(model, x_train, y_train, batch_size)

  if save_in_scope:
    with distribution.scope():
      self._save_model(model, saved_dir)
  else:
    self._save_model(model, saved_dir)
  return saved_dir
def run_test_save_strategy_restore_no_strategy(self, model_and_input, distribution, save_in_scope)

Save a model with DS, and restore it without DS.

Expand source code
def run_test_save_strategy_restore_no_strategy(self, model_and_input,
                                               distribution, save_in_scope):
  """Save a model with DS, and restore it without DS."""

  saved_dir = os.path.join(self.get_temp_dir(), '1')

  with distribution.scope():
    model = model_and_input.get_model()
    x_train, y_train, x_predict = model_and_input.get_data()
    batch_size = model_and_input.get_batch_size()

    self._train_model(model, x_train, y_train, batch_size)
    predict_dataset = self._get_predict_dataset(x_predict, batch_size)
    result_before_save = self._predict_with_model(
        distribution, model, predict_dataset)

  if save_in_scope:
    with distribution.scope():
      self._save_model(model, saved_dir)
  else:
    self._save_model(model, saved_dir)

  load_result = self._load_and_run_model(
      distribution=None,
      saved_dir=saved_dir,
      predict_dataset=predict_dataset)

  tolerance = get_tolerance(distribution, None)
  self.assertAllClose(result_before_save, load_result, atol=tolerance)
def run_test_save_strategy_restore_strategy(self, model_and_input, distribution_for_saving, distribution_for_restoring, save_in_scope)

Save a model with DS, and restore it with potentially different DS.

Expand source code
def run_test_save_strategy_restore_strategy(self, model_and_input,
                                            distribution_for_saving,
                                            distribution_for_restoring,
                                            save_in_scope):
  """Save a model with DS, and restore it with potentially different DS."""
  saved_dir = os.path.join(self.get_temp_dir(), '2')

  with distribution_for_saving.scope():
    model = model_and_input.get_model()
    x_train, y_train, x_predict = model_and_input.get_data()
    batch_size = model_and_input.get_batch_size()

    self._train_model(model, x_train, y_train, batch_size)
    predict_dataset = self._get_predict_dataset(x_predict, batch_size)
    result_before_save = self._predict_with_model(
        distribution_for_saving, model, predict_dataset)

  if save_in_scope:
    with distribution_for_saving.scope():
      self._save_model(model, saved_dir)
  else:
    self._save_model(model, saved_dir)

  with distribution_for_restoring.scope():

    load_result = self._load_and_run_model(
        distribution=distribution_for_restoring,
        saved_dir=saved_dir,
        predict_dataset=predict_dataset)

  tolerance = get_tolerance(distribution_for_saving,
                            distribution_for_restoring)
  self.assertAllClose(result_before_save, load_result, atol=tolerance)
def setUp(self)

Hook method for setting up the test fixture before exercising it.

Expand source code
def setUp(self):
  np.random.seed(_RANDOM_SEED)
  tf.compat.v1.set_random_seed(_RANDOM_SEED)
  self._root_dir = 'base'
  super(TestSavedModelBase, self).setUp()