shape of example features don't match expectation when using example keras code
See original GitHub issueHello,
I’m trying to run the example code using the keras model and the tf_record data. The problem is that the shape of the example feature is (None, None, None) but expected to have only 2 dimensions.
It is probably something stupid. Hope somebody can point me towards the error.
Here is my example code to recreate the problem. It is mostly copy paste from the examples. (error msg is below)
from google.protobuf import text_format
from tensorflow_serving.apis import input_pb2
import os
import tensorflow as tf
import tensorflow_ranking as tfr
_FILE_NAME = "/tmp/ranking_example.tf_record"
_LABEL_FEATURE = "relevance"
_PADDING_LABEL = -1
_SIZE="example_list_size"
def create_feature_columns():
sparse_column = tf.feature_column.categorical_column_with_hash_bucket(
key="user_id", hash_bucket_size=100, dtype=tf.int64)
query_embedding = tf.feature_column.embedding_column(
categorical_column=sparse_column, dimension=20)
context_feature_columns = {"user_id": query_embedding}
sparse_column = tf.feature_column.categorical_column_with_hash_bucket(
key="document_id", hash_bucket_size=100, dtype=tf.int64)
document_embedding = tf.feature_column.embedding_column(
categorical_column=sparse_column, dimension=20)
example_feature_columns = {"document_id": document_embedding}
return context_feature_columns, example_feature_columns
def make_dataset(file_pattern,
batch_size,
randomize_input=False,
num_epochs=None):
context_feature_columns, example_feature_columns = create_feature_columns()
context_feature_spec = tf.feature_column.make_parse_example_spec(
context_feature_columns.values())
label_column = tf.feature_column.numeric_column(
_LABEL_FEATURE, dtype=tf.int64, default_value=_PADDING_LABEL)
example_feature_spec = tf.feature_column.make_parse_example_spec(
list(example_feature_columns.values()) + [label_column])
dataset = tfr.data.build_ranking_dataset(
file_pattern=file_pattern,
data_format=tfr.data.ELWC,
batch_size=batch_size,
context_feature_spec=context_feature_spec,
example_feature_spec=example_feature_spec,
reader=tf.data.TFRecordDataset,
shuffle=randomize_input,
num_epochs=num_epochs,
size_feature_name=_SIZE)
def _separate_features_and_label(features):
label = tf.squeeze(features.pop(_LABEL_FEATURE), axis=2)
label = tf.cast(label, tf.float32)
return features, label
dataset = dataset.map(_separate_features_and_label)
return dataset
def test_ranking_example():
samples = text_format.Parse(
"""
context {
features {
feature {
key: "user_id"
value { int64_list { value: 1 } }
}
}
}
examples {
features {
feature {
key: "document_id"
value { int64_list { value: 1 } }
}
feature {
key: "relevance"
value { int64_list { value: 1 } }
}
}
}
examples {
features {
feature {
key: "document_id"
value { int64_list { value: 2 } }
}
feature {
key: "relevance"
value { int64_list { value: 0 } }
}
}
}""", input_pb2.ExampleListWithContext())
try:
os.remove(_FILE_NAME)
except FileNotFoundError:
pass
with tf.io.TFRecordWriter(_FILE_NAME) as writer:
for sample in [samples]*6:
writer.write(sample.SerializeToString())
batch_size = 2
dataset = make_dataset(_FILE_NAME, batch_size)
context_feature_columns, example_feature_columns = create_feature_columns()
# Use a Premade Network, or subclass and build your own!
network = tfr.keras.canned.DNNRankingNetwork(
context_feature_columns=context_feature_columns,
example_feature_columns=example_feature_columns,
hidden_layer_dims=[1024, 512, 256],
activation=tf.nn.relu,
dropout=0.5)
softmax_loss_obj = tfr.keras.losses.get(tfr.losses.RankingLossKey.SOFTMAX_LOSS)
# Contains all ranking metrics, including NDCG @ {1, 3, 5, 10}.
default_metrics = tfr.keras.metrics.default_keras_metrics()
ranker = tfr.keras.model.create_keras_model(
network=network,
loss=softmax_loss_obj,
metrics=default_metrics,
optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.05),
size_feature_name=_SIZE)
r = ranker.fit(
dataset,
steps_per_epoch=4,
epochs=10
)
assert r
error:
test_ranking_all.py:132:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py:819: in fit
use_multiprocessing=use_multiprocessing)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:235: in fit
use_multiprocessing=use_multiprocessing)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:593: in _process_training_inputs
use_multiprocessing=use_multiprocessing)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:706: in _process_inputs
use_multiprocessing=use_multiprocessing)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/data_adapter.py:702: in __init__
x = standardize_function(x)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:684: in standardize_function
return dataset.map(map_fn, num_parallel_calls=dataset_ops.AUTOTUNE)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py:1591: in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py:3926: in __init__
use_legacy_function=use_legacy_function)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py:3147: in __init__
self._function = wrapper_fn._get_concrete_function_internal()
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py:2395: in _get_concrete_function_internal
*args, **kwargs)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py:2389: in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py:2703: in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py:2593: in _create_graph_function
capture_by_value=self._capture_by_value),
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py:978: in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py:3140: in wrapper_fn
ret = _wrapper_helper(*args)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py:3082: in _wrapper_helper
ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = ({'document_id': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f47446fded0>, 'example_list_size....framework.sparse_tensor.SparseTensor object at 0x7f4744716590>}, <tf.Tensor 'args_3:0' shape=(2, None) dtype=float32>)
kwargs = {}
options = <tensorflow.python.autograph.core.converter.ConversionOptions object at 0x7f4744716950>
def wrapper(*args, **kwargs):
"""Wrapper that calls the converted version of f."""
options = converter.ConversionOptions(
recursive=recursive,
user_requested=user_requested,
optional_features=optional_features)
try:
return converted_call(f, args, kwargs, options=options)
except Exception as e: # pylint:disable=broad-except
if hasattr(e, 'ag_error_metadata'):
> raise e.ag_error_metadata.to_exception(e)
E ValueError: in converted code:
E
E /content-recommendations/.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:677 map_fn
E batch_size=None)
E /content-recommendations/.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py:2410 _standardize_tensors
E exception_prefix='input')
E content-recommendations/.venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py:573 standardize_input_data
E 'with shape ' + str(data_shape))
E
E ValueError: Error when checking input: expected document_id to have 2 dimensions, but got array with shape (None, None, None)
../../../.venv/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:237: ValueError
pip list:
Package Version
---------------------- -----------
absl-py 0.9.0
alembic 1.4.2
aniso8601 7.0.0
appdirs 1.4.3
astor 0.8.1
attrs 19.3.0
black 19.10b0
bleach 3.1.4
boto3 1.9.253
botocore 1.12.253
cached-property 1.5.1
cachetools 4.1.0
certifi 2020.4.5.1
cfgv 3.1.0
chardet 3.0.4
click 7.1.1
coloredlogs 14.0
contextlib2 0.6.0.post1
cycler 0.10.0
dagit 0.7.7
dagster 0.7.7
dagster-aws 0.7.7
dagster-cron 0.7.7
dagster-graphql 0.7.7
dagster-pandas 0.7.7
decorator 4.4.2
defusedxml 0.6.0
distlib 0.3.0
docutils 0.15.2
entrypoints 0.3
filelock 3.0.12
Flask 1.1.2
Flask-Cors 3.0.8
Flask-GraphQL 2.0.1
Flask-Sockets 0.2.1
funcsigs 1.0.2
future 0.18.2
gast 0.2.2
gevent 20.4.0
gevent-websocket 0.10.1
google-auth 1.14.0
google-auth-oauthlib 0.4.1
google-pasta 0.2.0
graphene 2.1.8
graphql-core 2.3.1
graphql-relay 2.0.1
graphql-server-core 1.2.0
graphql-ws 0.3.0
graphviz 0.14
greenlet 0.4.15
grpcio 1.28.1
h5py 2.10.0
humanfriendly 8.2
identify 1.4.15
idna 2.9
importlib-metadata 1.6.0
ipython-genutils 0.2.0
itsdangerous 1.1.0
Jinja2 2.11.2
jmespath 0.9.5
jsonschema 3.2.0
jupyter-core 4.6.3
Keras-Applications 1.0.8
Keras-Preprocessing 1.1.0
kiwisolver 1.2.0
Mako 1.1.2
Markdown 3.2.1
MarkupSafe 1.1.1
matplotlib 3.2.1
mistune 0.8.4
more-itertools 8.2.0
nbconvert 5.6.1
nbformat 5.0.6
nodeenv 1.3.5
numpy 1.18.3
oauthlib 3.1.0
opt-einsum 3.2.1
packaging 20.3
pandas 1.0.3
pandocfilters 1.4.2
pathspec 0.8.0
pathtools 0.1.2
pip 20.0.2
pip-tools 5.0.0
pkg-resources 0.0.0
pluggy 0.13.1
pre-commit 2.3.0
promise 2.3
protobuf 3.11.3
psycopg2-binary 2.8.5
py 1.8.1
pyarrow 0.17.0
pyasn1 0.4.8
pyasn1-modules 0.2.8
Pygments 2.6.1
PyMySQL 0.9.3
pyparsing 2.4.7
pyrsistent 0.16.0
pytest 5.4.1
python-crontab 2.4.1
python-dateutil 2.8.1
python-editor 1.0.4
pytz 2019.3
PyYAML 5.3.1
regex 2020.4.4
requests 2.23.0
requests-oauthlib 1.3.0
rsa 4.0
Rx 1.6.1
s3transfer 0.2.1
scipy 1.4.1
setuptools 46.1.3
six 1.14.0
SQLAlchemy 1.3.16
tensorboard 2.1.1
tensorflow 2.1.0
tensorflow-estimator 2.1.0
tensorflow-ranking 0.3.0
tensorflow-serving-api 2.1.0
termcolor 1.1.0
terminaltables 3.1.0
testpath 0.4.4
toml 0.10.0
toposort 1.5
tqdm 4.45.0
traitlets 4.3.3
typed-ast 1.4.1
urllib3 1.25.9
virtualenv 20.0.18
watchdog 0.10.2
wcwidth 0.1.9
webencodings 0.5.1
Werkzeug 1.0.1
wheel 0.34.2
wrapt 1.12.1
zipp 3.1.0
Issue Analytics
- State:
- Created 3 years ago
- Reactions:1
- Comments:6 (2 by maintainers)
Top Results From Across the Web
How to Make Predictions with Keras - Machine Learning Mastery
Once you choose and fit a final deep learning model in Keras, you can use it to make predictions on new data instances....
Read more >Introduction to Keras for Engineers
Introduction. Are you a machine learning engineer looking to use Keras to ship deep-learning powered features in real products?
Read more >Change input shape dimensions for fine-tuning with Keras
In this tutorial, you will learn how to change the input shape tensor dimensions for fine-tuning using Keras. After going through this guide ......
Read more >How to determine input shape in keras?
The number of rows in your training data is not part of the input shape of the network because the training process feeds...
Read more >A Practical Tutorial With Examples for Images and Text in Keras
Transfer learning is about leveraging feature representations from a pre-trained model, so you don't have to train a new model from scratch.
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found

well since its categorical data and tensorflow only offers here three different solutions (see https://www.tensorflow.org/tutorials/structured_data/feature_columns#categorical_columns). A numeric column wouldn’t make much sense it would imply that there is somehow a meaning in the id value itself (e.g. 3x the ID = 3x better).
cool. hope it’ll work
@yzhangswingman : Thanks for finding the root cause of the issue! This needs fixing, and I’ll update it in the next release.