Module keras.feature_column.sequence_feature_column
This API defines FeatureColumn for sequential input.
NOTE: This API is a work in progress and will likely be changing frequently.
Expand source code
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""This API defines FeatureColumn for sequential input.
NOTE: This API is a work in progress and will likely be changing frequently.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v2 as tf
from keras import backend
from keras.feature_column import base_feature_layer as kfc
from tensorflow.python.util.tf_export import keras_export
# pylint: disable=protected-access
@keras_export('keras.experimental.SequenceFeatures')
class SequenceFeatures(kfc._BaseFeaturesLayer):
"""A layer for sequence input.
All `feature_columns` must be sequence dense columns with the same
`sequence_length`. The output of this method can be fed into sequence
networks, such as RNN.
The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
`T` is the maximum sequence length for this batch, which could differ from
batch to batch.
If multiple `feature_columns` are given with `Di` `num_elements` each, their
outputs are concatenated. So, the final `Tensor` has shape
`[batch_size, T, D0 + D1 + ... + Dn]`.
Example:
```python
import tensorflow as tf
# Behavior of some cells or feature columns may depend on whether we are in
# training or inference mode, e.g. applying dropout.
training = True
rating = tf.feature_column.sequence_numeric_column('rating')
watches = tf.feature_column.sequence_categorical_column_with_identity(
'watches', num_buckets=1000)
watches_embedding = tf.feature_column.embedding_column(watches,
dimension=10)
columns = [rating, watches_embedding]
features = {
'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0],
[2.0,2.1,2.2, 2.3, 2.5]]),
'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]])
}
sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns)
sequence_input, sequence_length = sequence_input_layer(
features, training=training)
sequence_length_mask = tf.sequence_mask(sequence_length)
hidden_size = 32
rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)
rnn_layer = tf.keras.layers.RNN(rnn_cell)
outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
```
"""
def __init__(
self,
feature_columns,
trainable=True,
name=None,
**kwargs):
""""Constructs a SequenceFeatures layer.
Args:
feature_columns: An iterable of dense sequence columns. Valid columns are
- `embedding_column` that wraps a `sequence_categorical_column_with_*`
- `sequence_numeric_column`.
trainable: Boolean, whether the layer's variables will be updated via
gradient descent during training.
name: Name to give to the SequenceFeatures.
**kwargs: Keyword arguments to construct a layer.
Raises:
ValueError: If any of the `feature_columns` is not a
`SequenceDenseColumn`.
"""
super(SequenceFeatures, self).__init__(
feature_columns=feature_columns,
trainable=trainable,
name=name,
expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn,
**kwargs)
@property
def _is_feature_layer(self):
return True
def _target_shape(self, input_shape, total_elements):
return (input_shape[0], input_shape[1], total_elements)
def call(self, features, training=None):
"""Returns sequence input corresponding to the `feature_columns`.
Args:
features: A dict mapping keys to tensors.
training: Python boolean or None, indicating whether to the layer is being
run in training mode. This argument is passed to the call method of any
`FeatureColumn` that takes a `training` argument. For example, if a
`FeatureColumn` performed dropout, the column could expose a `training`
argument to control whether the dropout should be applied. If `None`,
defaults to `tf.keras.backend.learning_phase()`.
Returns:
An `(input_layer, sequence_length)` tuple where:
- input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
`T` is the maximum sequence length for this batch, which could differ
from batch to batch. `D` is the sum of `num_elements` for all
`feature_columns`.
- sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
length for each example.
Raises:
ValueError: If features are not a dictionary.
"""
if not isinstance(features, dict):
raise ValueError('We expected a dictionary here. Instead we got: ',
features)
if training is None:
training = backend.learning_phase()
transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features)
output_tensors = []
sequence_lengths = []
for column in self._feature_columns:
with backend.name_scope(column.name):
try:
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
transformation_cache, self._state_manager, training=training)
except TypeError:
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
transformation_cache, self._state_manager)
# Flattens the final dimension to produce a 3D Tensor.
output_tensors.append(self._process_dense_tensor(column, dense_tensor))
sequence_lengths.append(sequence_length)
# Check and process sequence lengths.
kfc._verify_static_batch_size_equality( # pylint: disable=protected-access
sequence_lengths, self._feature_columns)
sequence_length = _assert_all_equal_and_return(sequence_lengths)
return self._verify_and_concat_tensors(output_tensors), sequence_length
def _assert_all_equal_and_return(tensors, name=None):
"""Asserts that all tensors are equal and returns the first one."""
with backend.name_scope(name or 'assert_all_equal'):
if len(tensors) == 1:
return tensors[0]
assert_equal_ops = []
for t in tensors[1:]:
assert_equal_ops.append(tf.compat.v1.assert_equal(tensors[0], t))
with tf.control_dependencies(assert_equal_ops):
return tf.identity(tensors[0])
Classes
class SequenceFeatures (feature_columns, trainable=True, name=None, **kwargs)
-
A layer for sequence input.
All
feature_columns
must be sequence dense columns with the samesequence_length
. The output of this method can be fed into sequence networks, such as RNN.The output of this method is a 3D
Tensor
of shape[batch_size, T, D]
.T
is the maximum sequence length for this batch, which could differ from batch to batch.If multiple
feature_columns
are given withDi
num_elements
each, their outputs are concatenated. So, the finalTensor
has shape[batch_size, T, D0 + D1 + ... + Dn]
.Example:
import tensorflow as tf # Behavior of some cells or feature columns may depend on whether we are in # training or inference mode, e.g. applying dropout. training = True rating = tf.feature_column.sequence_numeric_column('rating') watches = tf.feature_column.sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = tf.feature_column.embedding_column(watches, dimension=10) columns = [rating, watches_embedding] features = { 'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0], [2.0,2.1,2.2, 2.3, 2.5]]), 'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]]) } sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns) sequence_input, sequence_length = sequence_input_layer( features, training=training) sequence_length_mask = tf.sequence_mask(sequence_length) hidden_size = 32 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
"Constructs a SequenceFeatures layer.
Args
feature_columns
- An iterable of dense sequence columns. Valid columns are
-
embedding_column
that wraps asequence_categorical_column_with_*
-sequence_numeric_column
. trainable
- Boolean, whether the layer's variables will be updated via gradient descent during training.
name
- Name to give to the SequenceFeatures.
**kwargs
- Keyword arguments to construct a layer.
Raises
ValueError
- If any of the
feature_columns
is not aSequenceDenseColumn
.
Expand source code
class SequenceFeatures(kfc._BaseFeaturesLayer): """A layer for sequence input. All `feature_columns` must be sequence dense columns with the same `sequence_length`. The output of this method can be fed into sequence networks, such as RNN. The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. If multiple `feature_columns` are given with `Di` `num_elements` each, their outputs are concatenated. So, the final `Tensor` has shape `[batch_size, T, D0 + D1 + ... + Dn]`. Example: ```python import tensorflow as tf # Behavior of some cells or feature columns may depend on whether we are in # training or inference mode, e.g. applying dropout. training = True rating = tf.feature_column.sequence_numeric_column('rating') watches = tf.feature_column.sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = tf.feature_column.embedding_column(watches, dimension=10) columns = [rating, watches_embedding] features = { 'rating': tf.sparse.from_dense([[1.0,1.1, 0, 0, 0], [2.0,2.1,2.2, 2.3, 2.5]]), 'watches': tf.sparse.from_dense([[2, 85, 0, 0, 0],[33,78, 2, 73, 1]]) } sequence_input_layer = tf.keras.experimental.SequenceFeatures(columns) sequence_input, sequence_length = sequence_input_layer( features, training=training) sequence_length_mask = tf.sequence_mask(sequence_length) hidden_size = 32 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size) rnn_layer = tf.keras.layers.RNN(rnn_cell) outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask) ``` """ def __init__( self, feature_columns, trainable=True, name=None, **kwargs): """"Constructs a SequenceFeatures layer. Args: feature_columns: An iterable of dense sequence columns. Valid columns are - `embedding_column` that wraps a `sequence_categorical_column_with_*` - `sequence_numeric_column`. trainable: Boolean, whether the layer's variables will be updated via gradient descent during training. name: Name to give to the SequenceFeatures. **kwargs: Keyword arguments to construct a layer. Raises: ValueError: If any of the `feature_columns` is not a `SequenceDenseColumn`. """ super(SequenceFeatures, self).__init__( feature_columns=feature_columns, trainable=trainable, name=name, expected_column_type=tf.__internal__.feature_column.SequenceDenseColumn, **kwargs) @property def _is_feature_layer(self): return True def _target_shape(self, input_shape, total_elements): return (input_shape[0], input_shape[1], total_elements) def call(self, features, training=None): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) if training is None: training = backend.learning_phase() transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) except TypeError: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append(self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. kfc._verify_static_batch_size_equality( # pylint: disable=protected-access sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
Ancestors
- keras.feature_column.base_feature_layer._BaseFeaturesLayer
- Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- LayerVersionSelector
Methods
def call(self, features, training=None)
-
Returns sequence input corresponding to the
feature_columns
.Args
features
- A dict mapping keys to tensors.
training
- Python boolean or None, indicating whether to the layer is being
run in training mode. This argument is passed to the call method of any
FeatureColumn
that takes atraining
argument. For example, if aFeatureColumn
performed dropout, the column could expose atraining
argument to control whether the dropout should be applied. IfNone
, defaults totf.keras.backend.learning_phase()
.
Returns
An
(input_layer, sequence_length)
tuple where: - input_layer: A floatTensor
of shape[batch_size, T, D]
.T
is the maximum sequence length for this batch, which could differ from batch to batch.D
is the sum ofnum_elements
for allfeature_columns
. - sequence_length: An intTensor
of shape[batch_size]
. The sequence length for each example.Raises
ValueError
- If features are not a dictionary.
Expand source code
def call(self, features, training=None): """Returns sequence input corresponding to the `feature_columns`. Args: features: A dict mapping keys to tensors. training: Python boolean or None, indicating whether to the layer is being run in training mode. This argument is passed to the call method of any `FeatureColumn` that takes a `training` argument. For example, if a `FeatureColumn` performed dropout, the column could expose a `training` argument to control whether the dropout should be applied. If `None`, defaults to `tf.keras.backend.learning_phase()`. Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. `T` is the maximum sequence length for this batch, which could differ from batch to batch. `D` is the sum of `num_elements` for all `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. Raises: ValueError: If features are not a dictionary. """ if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: ', features) if training is None: training = backend.learning_phase() transformation_cache = tf.__internal__.feature_column.FeatureTransformationCache(features) output_tensors = [] sequence_lengths = [] for column in self._feature_columns: with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) except TypeError: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager) # Flattens the final dimension to produce a 3D Tensor. output_tensors.append(self._process_dense_tensor(column, dense_tensor)) sequence_lengths.append(sequence_length) # Check and process sequence lengths. kfc._verify_static_batch_size_equality( # pylint: disable=protected-access sequence_lengths, self._feature_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) return self._verify_and_concat_tensors(output_tensors), sequence_length
Inherited members
Layer
:activity_regularizer
add_loss
add_metric
add_update
add_variable
add_weight
apply
build
compute_dtype
compute_mask
compute_output_shape
compute_output_signature
count_params
dtype
dtype_policy
dynamic
finalize_state
from_config
get_config
get_input_at
get_input_mask_at
get_input_shape_at
get_losses_for
get_output_at
get_output_mask_at
get_output_shape_at
get_updates_for
get_weights
inbound_nodes
input
input_mask
input_shape
input_spec
losses
metrics
name
non_trainable_variables
non_trainable_weights
outbound_nodes
output
output_mask
output_shape
set_weights
supports_masking
trainable_variables
trainable_weights
variable_dtype
variables
weights