Module keras.applications.mobilenet_v3

MobileNet v3 models for Keras.

Expand source code
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=invalid-name
# pylint: disable=missing-function-docstring
"""MobileNet v3 models for Keras."""

import tensorflow.compat.v2 as tf

from keras import backend
from keras import models
from keras.applications import imagenet_utils
from keras.layers import VersionAwareLayers
from keras.utils import data_utils
from keras.utils import layer_utils
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util.tf_export import keras_export


# TODO(scottzhu): Change this to the GCS path.
BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/'
                    'keras-applications/mobilenet_v3/')
WEIGHTS_HASHES = {
    'large_224_0.75_float': ('765b44a33ad4005b3ac83185abf1d0eb',
                             'e7b4d1071996dd51a2c2ca2424570e20'),
    'large_224_1.0_float': ('59e551e166be033d707958cf9e29a6a7',
                            '037116398e07f018c0005ffcb0406831'),
    'large_minimalistic_224_1.0_float': ('675e7b876c45c57e9e63e6d90a36599c',
                                         'a2c33aed672524d1d0b4431808177695'),
    'small_224_0.75_float': ('cb65d4e5be93758266aa0a7f2c6708b7',
                             '4d2fe46f1c1f38057392514b0df1d673'),
    'small_224_1.0_float': ('8768d4c2e7dee89b9d02b2d03d65d862',
                            'be7100780f875c06bcab93d76641aa26'),
    'small_minimalistic_224_1.0_float': ('99cd97fb2fcdad2bf028eb838de69e37',
                                         '20d4e357df3f7a6361f3a288857b1051'),
}

layers = VersionAwareLayers()


BASE_DOCSTRING = """Instantiates the {name} architecture.

  Reference:
  - [Searching for MobileNetV3](
      https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019)

  The following table describes the performance of MobileNets v3:
  ------------------------------------------------------------------------
  MACs stands for Multiply Adds

  |Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1 CPU(ms)|
  |---|---|---|---|---|
  | mobilenet_v3_large_1.0_224              | 217 | 5.4 |   75.6   |   51.2  |
  | mobilenet_v3_large_0.75_224             | 155 | 4.0 |   73.3   |   39.8  |
  | mobilenet_v3_large_minimalistic_1.0_224 | 209 | 3.9 |   72.3   |   44.1  |
  | mobilenet_v3_small_1.0_224              | 66  | 2.9 |   68.1   |   15.8  |
  | mobilenet_v3_small_0.75_224             | 44  | 2.4 |   65.4   |   12.8  |
  | mobilenet_v3_small_minimalistic_1.0_224 | 65  | 2.0 |   61.9   |   12.2  |

  For image classification use cases, see
  [this page for detailed examples](
    https://keras.io/api/applications/#usage-examples-for-image-classification-models).

  For transfer learning use cases, make sure to read the
  [guide to transfer learning & fine-tuning](
    https://keras.io/guides/transfer_learning/).

  Note: each Keras Application expects a specific kind of input preprocessing.
  For ModelNetV3, by default input preprocessing is included as a part of the
  model (as a `Rescaling` layer), and thus
  `tf.keras.applications.mobilenet_v3.preprocess_input` is actually a
  pass-through function. In this use case, ModelNetV3 models expect their inputs
  to be float tensors of pixels with values in the [0-255] range.
  At the same time, preprocessing as a part of the model (i.e. `Rescaling`
  layer) can be disabled by setting `include_preprocessing` argument to False.
  With preprocessing disabled ModelNetV3 models expect their inputs to be float
  tensors of pixels with values in the [-1, 1] range.

  Args:
    input_shape: Optional shape tuple, to be specified if you would
      like to use a model with an input image resolution that is not
      (224, 224, 3).
      It should have exactly 3 inputs channels (224, 224, 3).
      You can also omit this option if you would like
      to infer input_shape from an input_tensor.
      If you choose to include both input_tensor and input_shape then
      input_shape will be used if they match, if the shapes
      do not match then we will throw an error.
      E.g. `(160, 160, 3)` would be one valid value.
    alpha: controls the width of the network. This is known as the
      depth multiplier in the MobileNetV3 paper, but the name is kept for
      consistency with MobileNetV1 in Keras.
      - If `alpha` < 1.0, proportionally decreases the number
          of filters in each layer.
      - If `alpha` > 1.0, proportionally increases the number
          of filters in each layer.
      - If `alpha` = 1, default number of filters from the paper
          are used at each layer.
    minimalistic: In addition to large and small models this module also
      contains so-called minimalistic models, these models have the same
      per-layer dimensions characteristic as MobilenetV3 however, they don't
      utilize any of the advanced blocks (squeeze-and-excite units, hard-swish,
      and 5x5 convolutions). While these models are less efficient on CPU, they
      are much more performant on GPU/DSP.
    include_top: Boolean, whether to include the fully-connected
      layer at the top of the network. Defaults to `True`.
    weights: String, one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: Optional Keras tensor (i.e. output of
      `layers.Input()`)
      to use as image input for the model.
    pooling: String, optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model
          will be the 4D tensor output of the
          last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a
          2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: Integer, optional number of classes to classify images
      into, only to be specified if `include_top` is True, and
      if no `weights` argument is specified.
    dropout_rate: fraction of the input units to drop on the last layer.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
      When loading pretrained weights, `classifier_activation` can only
      be `None` or `"softmax"`.
    include_preprocessing: Boolean, whether to include the preprocessing
      layer (`Rescaling`) at the bottom of the network. Defaults to `True`.

  Call arguments:
    inputs: A floating point `numpy.array` or a `tf.Tensor`, 4D with 3 color
      channels, with values in the range [0, 255] if `include_preprocessing`
      is True and in the range [-1, 1] otherwise.

  Returns:
    A `keras.Model` instance.
"""


def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                classifier_activation='softmax',
                include_preprocessing=True):
  if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)):
    raise ValueError('The `weights` argument should be either '
                     '`None` (random initialization), `imagenet` '
                     '(pre-training on ImageNet), '
                     'or the path to the weights file to be loaded.')

  if weights == 'imagenet' and include_top and classes != 1000:
    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
                     'as true, `classes` should be 1000')

  # Determine proper input shape and default size.
  # If both input_shape and input_tensor are used, they should match
  if input_shape is not None and input_tensor is not None:
    try:
      is_input_t_tensor = backend.is_keras_tensor(input_tensor)
    except ValueError:
      try:
        is_input_t_tensor = backend.is_keras_tensor(
            layer_utils.get_source_inputs(input_tensor))
      except ValueError:
        raise ValueError('input_tensor: ', input_tensor,
                         'is not type input_tensor')
    if is_input_t_tensor:
      if backend.image_data_format() == 'channels_first':
        if backend.int_shape(input_tensor)[1] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
      else:
        if backend.int_shape(input_tensor)[2] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
    else:
      raise ValueError('input_tensor specified: ', input_tensor,
                       'is not a keras tensor')

  # If input_shape is None, infer shape from input_tensor
  if input_shape is None and input_tensor is not None:

    try:
      backend.is_keras_tensor(input_tensor)
    except ValueError:
      raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                       type(input_tensor), 'which is not a valid type')

    if backend.is_keras_tensor(input_tensor):
      if backend.image_data_format() == 'channels_first':
        rows = backend.int_shape(input_tensor)[2]
        cols = backend.int_shape(input_tensor)[3]
        input_shape = (3, cols, rows)
      else:
        rows = backend.int_shape(input_tensor)[1]
        cols = backend.int_shape(input_tensor)[2]
        input_shape = (cols, rows, 3)
  # If input_shape is None and input_tensor is None using standart shape
  if input_shape is None and input_tensor is None:
    input_shape = (None, None, 3)

  if backend.image_data_format() == 'channels_last':
    row_axis, col_axis = (0, 1)
  else:
    row_axis, col_axis = (1, 2)
  rows = input_shape[row_axis]
  cols = input_shape[col_axis]
  if rows and cols and (rows < 32 or cols < 32):
    raise ValueError('Input size must be at least 32x32; got `input_shape=' +
                     str(input_shape) + '`')
  if weights == 'imagenet':
    if (not minimalistic and alpha not in [0.75, 1.0]
        or minimalistic and alpha != 1.0):
      raise ValueError('If imagenet weights are being loaded, '
                       'alpha can be one of `0.75`, `1.0` for non minimalistic'
                       ' or `1.0` for minimalistic only.')

    if rows != cols or rows != 224:
      logging.warning('`input_shape` is undefined or non-square, '
                      'or `rows` is not 224.'
                      ' Weights for input shape (224, 224) will be'
                      ' loaded as the default.')

  if input_tensor is None:
    img_input = layers.Input(shape=input_shape)
  else:
    if not backend.is_keras_tensor(input_tensor):
      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
    else:
      img_input = input_tensor

  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

  if minimalistic:
    kernel = 3
    activation = relu
    se_ratio = None
  else:
    kernel = 5
    activation = hard_swish
    se_ratio = 0.25

  x = img_input
  if include_preprocessing:
    x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x)
  x = layers.Conv2D(
      16,
      kernel_size=3,
      strides=(2, 2),
      padding='same',
      use_bias=False,
      name='Conv')(x)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3,
      momentum=0.999, name='Conv/BatchNorm')(x)
  x = activation(x)

  x = stack_fn(x, kernel, activation, se_ratio)

  last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)

  # if the width multiplier is greater than 1 we
  # increase the number of output channels
  if alpha > 1.0:
    last_point_ch = _depth(last_point_ch * alpha)
  x = layers.Conv2D(
      last_conv_ch,
      kernel_size=1,
      padding='same',
      use_bias=False,
      name='Conv_1')(x)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3,
      momentum=0.999, name='Conv_1/BatchNorm')(x)
  x = activation(x)
  x = layers.GlobalAveragePooling2D(keepdims=True)(x)
  x = layers.Conv2D(
      last_point_ch,
      kernel_size=1,
      padding='same',
      use_bias=True,
      name='Conv_2')(x)
  x = activation(x)

  if include_top:
    if dropout_rate > 0:
      x = layers.Dropout(dropout_rate)(x)
    x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x)
    x = layers.Flatten()(x)
    imagenet_utils.validate_activation(classifier_activation, weights)
    x = layers.Activation(activation=classifier_activation,
                          name='Predictions')(x)
  else:
    if pooling == 'avg':
      x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    elif pooling == 'max':
      x = layers.GlobalMaxPooling2D(name='max_pool')(x)
  # Ensure that the model takes into account
  # any potential predecessors of `input_tensor`.
  if input_tensor is not None:
    inputs = layer_utils.get_source_inputs(input_tensor)
  else:
    inputs = img_input

  # Create model.
  model = models.Model(inputs, x, name='MobilenetV3' + model_type)

  # Load weights.
  if weights == 'imagenet':
    model_name = '{}{}_224_{}_float'.format(
        model_type, '_minimalistic' if minimalistic else '', str(alpha))
    if include_top:
      file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
      file_hash = WEIGHTS_HASHES[model_name][0]
    else:
      file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
      file_hash = WEIGHTS_HASHES[model_name][1]
    weights_path = data_utils.get_file(
        file_name,
        BASE_WEIGHT_PATH + file_name,
        cache_subdir='models',
        file_hash=file_hash)
    model.load_weights(weights_path)
  elif weights is not None:
    model.load_weights(weights)

  return model


@keras_export('keras.applications.MobileNetV3Small')
def MobileNetV3Small(input_shape=None,
                     alpha=1.0,
                     minimalistic=False,
                     include_top=True,
                     weights='imagenet',
                     input_tensor=None,
                     classes=1000,
                     pooling=None,
                     dropout_rate=0.2,
                     classifier_activation='softmax',
                     include_preprocessing=True):

  def stack_fn(x, kernel, activation, se_ratio):

    def depth(d):
      return _depth(d * alpha)

    x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0)
    x = _inverted_res_block(x, 72. / 16, depth(24), 3, 2, None, relu, 1)
    x = _inverted_res_block(x, 88. / 24, depth(24), 3, 1, None, relu, 2)
    x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3)
    x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4)
    x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5)
    x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6)
    x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7)
    x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8)
    x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9)
    x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation,
                            10)
    return x

  return MobileNetV3(stack_fn, 1024, input_shape, alpha, 'small', minimalistic,
                     include_top, weights, input_tensor, classes, pooling,
                     dropout_rate, classifier_activation, include_preprocessing)


@keras_export('keras.applications.MobileNetV3Large')
def MobileNetV3Large(input_shape=None,
                     alpha=1.0,
                     minimalistic=False,
                     include_top=True,
                     weights='imagenet',
                     input_tensor=None,
                     classes=1000,
                     pooling=None,
                     dropout_rate=0.2,
                     classifier_activation='softmax',
                     include_preprocessing=True):

  def stack_fn(x, kernel, activation, se_ratio):

    def depth(d):
      return _depth(d * alpha)

    x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0)
    x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1)
    x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2)
    x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3)
    x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4)
    x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5)
    x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6)
    x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7)
    x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8)
    x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9)
    x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10)
    x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11)
    x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation,
                            12)
    x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation,
                            13)
    x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation,
                            14)
    return x

  return MobileNetV3(stack_fn, 1280, input_shape, alpha, 'large', minimalistic,
                     include_top, weights, input_tensor, classes, pooling,
                     dropout_rate, classifier_activation, include_preprocessing)


MobileNetV3Small.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Small')
MobileNetV3Large.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Large')


def relu(x):
  return layers.ReLU()(x)


def hard_sigmoid(x):
  return layers.ReLU(6.)(x + 3.) * (1. / 6.)


def hard_swish(x):
  return layers.Multiply()([x, hard_sigmoid(x)])


# This function is taken from the original tf repo.
# It ensures that all layers have a channel number that is divisible by 8
# It can be seen here:
# https://github.com/tensorflow/models/blob/master/research/
# slim/nets/mobilenet/mobilenet.py


def _depth(v, divisor=8, min_value=None):
  if min_value is None:
    min_value = divisor
  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  # Make sure that round down does not go down by more than 10%.
  if new_v < 0.9 * v:
    new_v += divisor
  return new_v


def _se_block(inputs, filters, se_ratio, prefix):
  x = layers.GlobalAveragePooling2D(
      keepdims=True, name=prefix + 'squeeze_excite/AvgPool')(
          inputs)
  x = layers.Conv2D(
      _depth(filters * se_ratio),
      kernel_size=1,
      padding='same',
      name=prefix + 'squeeze_excite/Conv')(
          x)
  x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x)
  x = layers.Conv2D(
      filters,
      kernel_size=1,
      padding='same',
      name=prefix + 'squeeze_excite/Conv_1')(
          x)
  x = hard_sigmoid(x)
  x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
  return x


def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio,
                        activation, block_id):
  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
  shortcut = x
  prefix = 'expanded_conv/'
  infilters = backend.int_shape(x)[channel_axis]
  if block_id:
    # Expand
    prefix = 'expanded_conv_{}/'.format(block_id)
    x = layers.Conv2D(
        _depth(infilters * expansion),
        kernel_size=1,
        padding='same',
        use_bias=False,
        name=prefix + 'expand')(
            x)
    x = layers.BatchNormalization(
        axis=channel_axis,
        epsilon=1e-3,
        momentum=0.999,
        name=prefix + 'expand/BatchNorm')(
            x)
    x = activation(x)

  if stride == 2:
    x = layers.ZeroPadding2D(
        padding=imagenet_utils.correct_pad(x, kernel_size),
        name=prefix + 'depthwise/pad')(
            x)
  x = layers.DepthwiseConv2D(
      kernel_size,
      strides=stride,
      padding='same' if stride == 1 else 'valid',
      use_bias=False,
      name=prefix + 'depthwise')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'depthwise/BatchNorm')(
          x)
  x = activation(x)

  if se_ratio:
    x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

  x = layers.Conv2D(
      filters,
      kernel_size=1,
      padding='same',
      use_bias=False,
      name=prefix + 'project')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'project/BatchNorm')(
          x)

  if stride == 1 and infilters == filters:
    x = layers.Add(name=prefix + 'Add')([shortcut, x])
  return x


@keras_export('keras.applications.mobilenet_v3.preprocess_input')
def preprocess_input(x, data_format=None):  # pylint: disable=unused-argument
  """A placeholder method for backward compatibility.

  The preprocessing logic has been included in the mobilenet_v3 model
  implementation. Users are no longer required to call this method to normalize
  the input data. This method does nothing and only kept as a placeholder to
  align the API surface between old and new version of model.

  Args:
    x: A floating point `numpy.array` or a `tf.Tensor`.
    data_format: Optional data format of the image tensor/array. Defaults to
      None, in which case the global setting
      `tf.keras.backend.image_data_format()` is used (unless you changed it,
      it defaults to "channels_last").{mode}

  Returns:
    Unchanged `numpy.array` or `tf.Tensor`.
  """
  return x


@keras_export('keras.applications.mobilenet_v3.decode_predictions')
def decode_predictions(preds, top=5):
  return imagenet_utils.decode_predictions(preds, top=top)


decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

Functions

def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax', include_preprocessing=True)
Expand source code
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                classifier_activation='softmax',
                include_preprocessing=True):
  if not (weights in {'imagenet', None} or tf.io.gfile.exists(weights)):
    raise ValueError('The `weights` argument should be either '
                     '`None` (random initialization), `imagenet` '
                     '(pre-training on ImageNet), '
                     'or the path to the weights file to be loaded.')

  if weights == 'imagenet' and include_top and classes != 1000:
    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
                     'as true, `classes` should be 1000')

  # Determine proper input shape and default size.
  # If both input_shape and input_tensor are used, they should match
  if input_shape is not None and input_tensor is not None:
    try:
      is_input_t_tensor = backend.is_keras_tensor(input_tensor)
    except ValueError:
      try:
        is_input_t_tensor = backend.is_keras_tensor(
            layer_utils.get_source_inputs(input_tensor))
      except ValueError:
        raise ValueError('input_tensor: ', input_tensor,
                         'is not type input_tensor')
    if is_input_t_tensor:
      if backend.image_data_format() == 'channels_first':
        if backend.int_shape(input_tensor)[1] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
      else:
        if backend.int_shape(input_tensor)[2] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
    else:
      raise ValueError('input_tensor specified: ', input_tensor,
                       'is not a keras tensor')

  # If input_shape is None, infer shape from input_tensor
  if input_shape is None and input_tensor is not None:

    try:
      backend.is_keras_tensor(input_tensor)
    except ValueError:
      raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                       type(input_tensor), 'which is not a valid type')

    if backend.is_keras_tensor(input_tensor):
      if backend.image_data_format() == 'channels_first':
        rows = backend.int_shape(input_tensor)[2]
        cols = backend.int_shape(input_tensor)[3]
        input_shape = (3, cols, rows)
      else:
        rows = backend.int_shape(input_tensor)[1]
        cols = backend.int_shape(input_tensor)[2]
        input_shape = (cols, rows, 3)
  # If input_shape is None and input_tensor is None using standart shape
  if input_shape is None and input_tensor is None:
    input_shape = (None, None, 3)

  if backend.image_data_format() == 'channels_last':
    row_axis, col_axis = (0, 1)
  else:
    row_axis, col_axis = (1, 2)
  rows = input_shape[row_axis]
  cols = input_shape[col_axis]
  if rows and cols and (rows < 32 or cols < 32):
    raise ValueError('Input size must be at least 32x32; got `input_shape=' +
                     str(input_shape) + '`')
  if weights == 'imagenet':
    if (not minimalistic and alpha not in [0.75, 1.0]
        or minimalistic and alpha != 1.0):
      raise ValueError('If imagenet weights are being loaded, '
                       'alpha can be one of `0.75`, `1.0` for non minimalistic'
                       ' or `1.0` for minimalistic only.')

    if rows != cols or rows != 224:
      logging.warning('`input_shape` is undefined or non-square, '
                      'or `rows` is not 224.'
                      ' Weights for input shape (224, 224) will be'
                      ' loaded as the default.')

  if input_tensor is None:
    img_input = layers.Input(shape=input_shape)
  else:
    if not backend.is_keras_tensor(input_tensor):
      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
    else:
      img_input = input_tensor

  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

  if minimalistic:
    kernel = 3
    activation = relu
    se_ratio = None
  else:
    kernel = 5
    activation = hard_swish
    se_ratio = 0.25

  x = img_input
  if include_preprocessing:
    x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x)
  x = layers.Conv2D(
      16,
      kernel_size=3,
      strides=(2, 2),
      padding='same',
      use_bias=False,
      name='Conv')(x)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3,
      momentum=0.999, name='Conv/BatchNorm')(x)
  x = activation(x)

  x = stack_fn(x, kernel, activation, se_ratio)

  last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)

  # if the width multiplier is greater than 1 we
  # increase the number of output channels
  if alpha > 1.0:
    last_point_ch = _depth(last_point_ch * alpha)
  x = layers.Conv2D(
      last_conv_ch,
      kernel_size=1,
      padding='same',
      use_bias=False,
      name='Conv_1')(x)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3,
      momentum=0.999, name='Conv_1/BatchNorm')(x)
  x = activation(x)
  x = layers.GlobalAveragePooling2D(keepdims=True)(x)
  x = layers.Conv2D(
      last_point_ch,
      kernel_size=1,
      padding='same',
      use_bias=True,
      name='Conv_2')(x)
  x = activation(x)

  if include_top:
    if dropout_rate > 0:
      x = layers.Dropout(dropout_rate)(x)
    x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x)
    x = layers.Flatten()(x)
    imagenet_utils.validate_activation(classifier_activation, weights)
    x = layers.Activation(activation=classifier_activation,
                          name='Predictions')(x)
  else:
    if pooling == 'avg':
      x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    elif pooling == 'max':
      x = layers.GlobalMaxPooling2D(name='max_pool')(x)
  # Ensure that the model takes into account
  # any potential predecessors of `input_tensor`.
  if input_tensor is not None:
    inputs = layer_utils.get_source_inputs(input_tensor)
  else:
    inputs = img_input

  # Create model.
  model = models.Model(inputs, x, name='MobilenetV3' + model_type)

  # Load weights.
  if weights == 'imagenet':
    model_name = '{}{}_224_{}_float'.format(
        model_type, '_minimalistic' if minimalistic else '', str(alpha))
    if include_top:
      file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
      file_hash = WEIGHTS_HASHES[model_name][0]
    else:
      file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
      file_hash = WEIGHTS_HASHES[model_name][1]
    weights_path = data_utils.get_file(
        file_name,
        BASE_WEIGHT_PATH + file_name,
        cache_subdir='models',
        file_hash=file_hash)
    model.load_weights(weights_path)
  elif weights is not None:
    model.load_weights(weights)

  return model
def MobileNetV3Large(input_shape=None, alpha=1.0, minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax', include_preprocessing=True)

Instantiates the MobileNetV3Large architecture.

Reference: - Searching for MobileNetV3 (ICCV 2019)

The following table describes the performance of MobileNets v3:

MACs stands for Multiply Adds

Classification Checkpoint MACs(M) Parameters(M) Top1 Accuracy Pixel1 CPU(ms)
mobilenet_v3_large_1.0_224 217 5.4 75.6 51.2
mobilenet_v3_large_0.75_224 155 4.0 73.3 39.8
mobilenet_v3_large_minimalistic_1.0_224 209 3.9 72.3 44.1
mobilenet_v3_small_1.0_224 66 2.9 68.1 15.8
mobilenet_v3_small_0.75_224 44 2.4 65.4 12.8
mobilenet_v3_small_minimalistic_1.0_224 65 2.0 61.9 12.2

For image classification use cases, see this page for detailed examples.

For transfer learning use cases, make sure to read the guide to transfer learning & fine-tuning.

Note: each Keras Application expects a specific kind of input preprocessing. For ModelNetV3, by default input preprocessing is included as a part of the model (as a Rescaling layer), and thus tf.keras.applications.mobilenet_v3.preprocess_input is actually a pass-through function. In this use case, ModelNetV3 models expect their inputs to be float tensors of pixels with values in the [0-255] range. At the same time, preprocessing as a part of the model (i.e. Rescaling layer) can be disabled by setting include_preprocessing argument to False. With preprocessing disabled ModelNetV3 models expect their inputs to be float tensors of pixels with values in the [-1, 1] range.

Args

input_shape
Optional shape tuple, to be specified if you would like to use a model with an input image resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. (160, 160, 3) would be one valid value.
alpha
controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If alpha < 1.0, proportionally decreases the number of filters in each layer. - If alpha > 1.0, proportionally increases the number of filters in each layer. - If alpha = 1, default number of filters from the paper are used at each layer.
minimalistic
In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP.
include_top
Boolean, whether to include the fully-connected layer at the top of the network. Defaults to True.
weights
String, one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded.
input_tensor
Optional Keras tensor (i.e. output of layers.Input()) to use as image input for the model.
pooling
String, optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional block. - avg means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - max means that global max pooling will be applied.
classes
Integer, optional number of classes to classify images into, only to be specified if include_top is True, and if no weights argument is specified.
dropout_rate
fraction of the input units to drop on the last layer.
classifier_activation
A str or callable. The activation function to use on the "top" layer. Ignored unless include_top=True. Set classifier_activation=None to return the logits of the "top" layer. When loading pretrained weights, classifier_activation can only be None or "softmax".
include_preprocessing
Boolean, whether to include the preprocessing layer (Rescaling) at the bottom of the network. Defaults to True.

Call arguments: inputs: A floating point numpy.array or a tf.Tensor, 4D with 3 color channels, with values in the range [0, 255] if include_preprocessing is True and in the range [-1, 1] otherwise.

Returns

A keras.Model instance.

Expand source code
@keras_export('keras.applications.MobileNetV3Large')
def MobileNetV3Large(input_shape=None,
                     alpha=1.0,
                     minimalistic=False,
                     include_top=True,
                     weights='imagenet',
                     input_tensor=None,
                     classes=1000,
                     pooling=None,
                     dropout_rate=0.2,
                     classifier_activation='softmax',
                     include_preprocessing=True):

  def stack_fn(x, kernel, activation, se_ratio):

    def depth(d):
      return _depth(d * alpha)

    x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0)
    x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1)
    x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2)
    x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3)
    x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4)
    x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5)
    x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6)
    x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7)
    x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8)
    x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9)
    x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10)
    x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11)
    x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation,
                            12)
    x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation,
                            13)
    x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation,
                            14)
    return x

  return MobileNetV3(stack_fn, 1280, input_shape, alpha, 'large', minimalistic,
                     include_top, weights, input_tensor, classes, pooling,
                     dropout_rate, classifier_activation, include_preprocessing)
def MobileNetV3Small(input_shape=None, alpha=1.0, minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax', include_preprocessing=True)

Instantiates the MobileNetV3Small architecture.

Reference: - Searching for MobileNetV3 (ICCV 2019)

The following table describes the performance of MobileNets v3:

MACs stands for Multiply Adds

Classification Checkpoint MACs(M) Parameters(M) Top1 Accuracy Pixel1 CPU(ms)
mobilenet_v3_large_1.0_224 217 5.4 75.6 51.2
mobilenet_v3_large_0.75_224 155 4.0 73.3 39.8
mobilenet_v3_large_minimalistic_1.0_224 209 3.9 72.3 44.1
mobilenet_v3_small_1.0_224 66 2.9 68.1 15.8
mobilenet_v3_small_0.75_224 44 2.4 65.4 12.8
mobilenet_v3_small_minimalistic_1.0_224 65 2.0 61.9 12.2

For image classification use cases, see this page for detailed examples.

For transfer learning use cases, make sure to read the guide to transfer learning & fine-tuning.

Note: each Keras Application expects a specific kind of input preprocessing. For ModelNetV3, by default input preprocessing is included as a part of the model (as a Rescaling layer), and thus tf.keras.applications.mobilenet_v3.preprocess_input is actually a pass-through function. In this use case, ModelNetV3 models expect their inputs to be float tensors of pixels with values in the [0-255] range. At the same time, preprocessing as a part of the model (i.e. Rescaling layer) can be disabled by setting include_preprocessing argument to False. With preprocessing disabled ModelNetV3 models expect their inputs to be float tensors of pixels with values in the [-1, 1] range.

Args

input_shape
Optional shape tuple, to be specified if you would like to use a model with an input image resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. (160, 160, 3) would be one valid value.
alpha
controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If alpha < 1.0, proportionally decreases the number of filters in each layer. - If alpha > 1.0, proportionally increases the number of filters in each layer. - If alpha = 1, default number of filters from the paper are used at each layer.
minimalistic
In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP.
include_top
Boolean, whether to include the fully-connected layer at the top of the network. Defaults to True.
weights
String, one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded.
input_tensor
Optional Keras tensor (i.e. output of layers.Input()) to use as image input for the model.
pooling
String, optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional block. - avg means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - max means that global max pooling will be applied.
classes
Integer, optional number of classes to classify images into, only to be specified if include_top is True, and if no weights argument is specified.
dropout_rate
fraction of the input units to drop on the last layer.
classifier_activation
A str or callable. The activation function to use on the "top" layer. Ignored unless include_top=True. Set classifier_activation=None to return the logits of the "top" layer. When loading pretrained weights, classifier_activation can only be None or "softmax".
include_preprocessing
Boolean, whether to include the preprocessing layer (Rescaling) at the bottom of the network. Defaults to True.

Call arguments: inputs: A floating point numpy.array or a tf.Tensor, 4D with 3 color channels, with values in the range [0, 255] if include_preprocessing is True and in the range [-1, 1] otherwise.

Returns

A keras.Model instance.

Expand source code
@keras_export('keras.applications.MobileNetV3Small')
def MobileNetV3Small(input_shape=None,
                     alpha=1.0,
                     minimalistic=False,
                     include_top=True,
                     weights='imagenet',
                     input_tensor=None,
                     classes=1000,
                     pooling=None,
                     dropout_rate=0.2,
                     classifier_activation='softmax',
                     include_preprocessing=True):

  def stack_fn(x, kernel, activation, se_ratio):

    def depth(d):
      return _depth(d * alpha)

    x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0)
    x = _inverted_res_block(x, 72. / 16, depth(24), 3, 2, None, relu, 1)
    x = _inverted_res_block(x, 88. / 24, depth(24), 3, 1, None, relu, 2)
    x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3)
    x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4)
    x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5)
    x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6)
    x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7)
    x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8)
    x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9)
    x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation,
                            10)
    return x

  return MobileNetV3(stack_fn, 1024, input_shape, alpha, 'small', minimalistic,
                     include_top, weights, input_tensor, classes, pooling,
                     dropout_rate, classifier_activation, include_preprocessing)
def decode_predictions(preds, top=5)

Decodes the prediction of an ImageNet model.

Args

preds
Numpy array encoding a batch of predictions.
top
Integer, how many top-guesses to return. Defaults to 5.

Returns

A list of lists of top class prediction tuples (class_name, class_description, score). One list of tuples per sample in batch input.

Raises

ValueError
In case of invalid shape of the pred array (must be 2D).
Expand source code
@keras_export('keras.applications.mobilenet_v3.decode_predictions')
def decode_predictions(preds, top=5):
  return imagenet_utils.decode_predictions(preds, top=top)
def hard_sigmoid(x)
Expand source code
def hard_sigmoid(x):
  return layers.ReLU(6.)(x + 3.) * (1. / 6.)
def hard_swish(x)
Expand source code
def hard_swish(x):
  return layers.Multiply()([x, hard_sigmoid(x)])
def preprocess_input(x, data_format=None)

A placeholder method for backward compatibility.

The preprocessing logic has been included in the mobilenet_v3 model implementation. Users are no longer required to call this method to normalize the input data. This method does nothing and only kept as a placeholder to align the API surface between old and new version of model.

Args

x
A floating point numpy.array or a tf.Tensor.
data_format
Optional data format of the image tensor/array. Defaults to None, in which case the global setting tf.keras.backend.image_data_format() is used (unless you changed it, it defaults to "channels_last").{mode}

Returns

Unchanged numpy.array or tf.Tensor.

Expand source code
@keras_export('keras.applications.mobilenet_v3.preprocess_input')
def preprocess_input(x, data_format=None):  # pylint: disable=unused-argument
  """A placeholder method for backward compatibility.

  The preprocessing logic has been included in the mobilenet_v3 model
  implementation. Users are no longer required to call this method to normalize
  the input data. This method does nothing and only kept as a placeholder to
  align the API surface between old and new version of model.

  Args:
    x: A floating point `numpy.array` or a `tf.Tensor`.
    data_format: Optional data format of the image tensor/array. Defaults to
      None, in which case the global setting
      `tf.keras.backend.image_data_format()` is used (unless you changed it,
      it defaults to "channels_last").{mode}

  Returns:
    Unchanged `numpy.array` or `tf.Tensor`.
  """
  return x
def relu(x)
Expand source code
def relu(x):
  return layers.ReLU()(x)