Weight name conflicts when keras model consists of subclassed layer
See original GitHub issueDescribe the bug Weight name duplication issues when using subclassed keras layer with many ops. For instance, a custom layer with 1Conv+1Dense. (no custom kernel)
System information
TensorFlow installed from (source or binary): binary (pip install tensorflow-gpu)
TensorFlow version: 2.1.0
TensorFlow Model Optimization version: master (built from source)
Python version: 3.7
Describe the expected behavior Annotate each weight in subclassed layer correctly.
Describe the current behavior
- When quantize layer,
tfmotclones original layer.
class ModelTransformer(object):
def _get_keras_layer_weights(self, keras_layer):
"""Returns a map of weight name, weight matrix. Keeps keras ordering."""
weights_map = collections.OrderedDict()
for weight_tensor, weight_numpy in \
zip(keras_layer.weights, keras_layer.get_weights()):
weights_map[self._weight_name(weight_tensor.name)] = weight_numpy
return weights_map
Because self._weight_name(...) function get rid of layer name information, it leaves only kernel:N or bias:N. If cloned layer is subclassed layer and has many weights like ‘mylayer/conv2d/kernel:0’ and ‘mylayer/dense/kernel:0’, names conflict!
- When using built-in QuantizeConfigs (e.g.
LastValQuantizeConfig), It adds weights with postfix (_minand_max) to original weight name.
class QuantizeWrapper(tf.keras.layers.Wrapper):
def build(self, input_shape):
super(QuantizeWrapper, self).build(input_shape)
self.optimizer_step = self.add_weight(
'optimizer_step',
initializer=tf.keras.initializers.Constant(-1),
dtype=tf.dtypes.int32,
trainable=False)
self._weight_vars = []
for weight, quantizer in \
self.quantize_config.get_weights_and_quantizers(self.layer):
quantizer_vars = quantizer.build(weight.shape,
self._weight_name(weight.name), self)
Because self._weight_name(...) function get rid of layer name information, it leaves only kernel:N or bias:N. If cloned layer is subclassed layer and has many weights like ‘mylayer/conv2d/kernel:0’ and ‘mylayer/dense/kernel:0’ becomes ‘kernel:0’. and min/max weights all become ‘kernel_min:0’ and ‘kernel_max:0’. Names conflict!
Code to reproduce the issue This code is my subclassed layer version of Mnist.
from typing import Any, Dict, List, Optional, Tuple
# for model construction
import tensorflow as tf
# for model quantization
from tensorflow_model_optimization.python.core.quantization.keras import quantize_config
from tensorflow_model_optimization.python.core.quantization.keras import quantizers
class MnistNet(tf.keras.layers.Layer):
def __init__(self, num_classes: int, **kwargs: Any):
super().__init__(**kwargs)
self.num_classes = num_classes
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update({
'num_classes': self.num_classes,
})
return config
def build(self, input_shape: Any) -> None:
self.conv1 = tf.keras.layers.Conv2D(
32, 5,
padding='same',
activation='relu',
use_bias=False,
input_shape=input_shape,
)
self.pool1 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2), padding='same')
self.conv2 = tf.keras.layers.Conv2D(
64, 5,
padding='same',
activation='relu',
use_bias=False,
)
self.pool2 = tf.keras.layers.MaxPooling2D((2, 2), (2, 2), padding='same')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(1024, activation='relu')
self.dropout = tf.keras.layers.Dropout(0.4)
self.fc2 = tf.keras.layers.Dense(self.num_classes)
self.softmax = tf.keras.layers.Softmax()
def call(
self,
inputs: tf.Tensor,
training: Optional[bool] = None
) -> Tuple[tf.Tensor, Dict[str, Any]]:
if training is None:
training = tf.keras.backend.learning_phase()
x = self.conv1(inputs, training=training) # -> 28 x 28 x 32
x = self.pool1(x, training=training) # -> 14 x 14 x 32
x = self.conv2(x, training=training) # -> 14 x 14 x 64
x = self.pool2(x, training=training) # -> 7 x 7 x 64
x = self.flatten(x, training=training) # -> 3136
x = self.fc1(x, training=training) # -> 1024
x = self.dropout(x, training=training) # -> 1024
x = self.fc2(x, training=training) # -> 10
x = self.softmax(x, training=training) # -> 10
return x, {}
class MnistNetQuantizeConfig(quantize_config.QuantizeConfig):
def __init__(self):
self.sublayers_to_be_quantized = [
'conv1', 'conv2', 'fc1', 'fc2'
]
def get_weights_and_quantizers(self, layer):
return [
(sublayer.kernel, MnistNetQuantizer(sublayer.name))
for name in self.sublayers_to_be_quantized
for sublayer in [getattr(layer, name)]
]
def get_activations_and_quantizers(self, layer):
return [
(sublayer.activation, MnistNetQuantizer(sublayer.name))
for name in self.sublayers_to_be_quantized
for sublayer in [getattr(layer, name)]
]
def set_quantize_weights(self, layer, quantize_weights):
pass
def set_quantize_activations(self, layer, quantize_activations):
pass
def get_output_quantizers(self, layer):
return []
def get_config(self) -> Dict[str, Any]:
return {}
# Fixed version of LastValQauntizer
class MnistNetQuantizer(quantizers.Quantizer):
def __init__(
self,
layer_name: str,
num_bits: int = 8,
per_channel: bool = False,
symmetric: bool = False,
narrow_range: bool = False,
initial_min: float = -1.0,
initial_max: float = +1.0,
):
self.layer_name = layer_name
self.num_bits = num_bits
self.per_channel = per_channel
self.symmetric = symmetric
self.narrow_range = narrow_range
self.initial_min = initial_min
self.initial_max = initial_max
def get_config(self) -> Dict[str, Any]:
return {
'layer_name': self.layer_name,
'num_bits': self.num_bits,
'per_channel': self.per_channel,
'symmetric': self.symmetric,
'narrow_range': self.narrow_range,
'initial_min': self.initial_min,
'initial_max': self.initial_max,
}
def build(
self,
tensor_shape: Any,
name: str,
layer: tf.keras.layers.Layer,
) -> Dict[str, Any]:
# Push front layer_name to weight name,
# becuase tfmot passes 'splited' weight name,
# and duplicated name issuses occur.
# It makes subclassed keras layer cannot use built-in LastValueQuantizer.
min_var = layer.add_weight(
self.layer_name + '/' + name + '_min',
initializer=tf.keras.initializers.Constant(self.initial_min),
trainable=False,
)
max_var = layer.add_weight(
self.layer_name + '/' + name + '_max',
initializer=tf.keras.initializers.Constant(self.initial_max),
trainable=False,
)
return {'min_var': min_var, 'max_var': max_var}
def __call__(self, inputs, step, training, **kwargs):
with tf.name_scope('MnistNetQuantization'):
min_var = kwargs['min_var']
max_var = kwargs['max_var']
input_shape = inputs.get_shape()
input_dim = len(input_shape)
if not training:
if self.per_channel:
assert len(min_var.get_shape()) == 1
assert len(max_var.get_shape()) == 1
return tf.quantization.fake_quant_with_min_max_vars_per_channel(
inputs, min_var, max_var,
num_bits=self.num_bits,
narrow_range=self.narrow_range,
)
else:
assert min_var.get_shape() == []
assert max_var.get_shape() == []
return tf.quantization.fake_quant_with_min_max_vars(
inputs, min_var, max_var,
num_bits=self.num_bits,
narrow_range=self.narrow_range
)
if self.per_channel:
if input_dim == 2:
reduce_dims = [0]
elif input_dim == 4:
reduce_dims = [0, 1, 2]
if self.per_channel:
if input_dim >= 2:
batch_min = tf.math.reduce_min(
inputs, axis=reduce_dims, name='BatchMin'
)
else:
batch_min = inputs
else:
batch_min = tf.math.reduce_min(inputs, name='BatchMin')
if self.per_channel:
if input_dim >= 2:
batch_max = tf.math.reduce_max(
inputs, axis=reduce_dims, name='BatchMax')
else:
batch_max = inputs
else:
batch_max = tf.math.reduce_max(inputs, name='BatchMax')
if self.symmetric:
if self.narrow_range:
min_max_ratio = -1
else:
min_max_ratio = -((1 << self.num_bits) - 2) / (1 << self.num_bits)
range_min = tf.math.minimum(batch_min, batch_max / min_max_ratio)
range_max = tf.math.maximum(batch_max, batch_min * min_max_ratio)
else:
range_min = tf.math.minimum(batch_min, 0.0)
range_max = tf.math.maximum(batch_max, 0.0)
assign_min = tf.compat.v1.assign(min_var, range_min, name='AssignMinLast')
assign_max = tf.compat.v1.assign(max_var, range_max, name='AssignMaxLast')
if self.per_channel:
assert len(assign_min.get_shape()) == 1
assert len(assign_max.get_shape()) == 1
return tf.quantization.fake_quant_with_min_max_vars_per_channel(
inputs, min_var, max_var,
num_bits=self.num_bits,
narrow_range=self.narrow_range,
)
else:
assert assign_min.get_shape() == []
assert assign_max.get_shape() == []
return tf.quantization.fake_quant_with_min_max_vars(
inputs, min_var, max_var,
num_bits=self.num_bits,
narrow_range=self.narrow_range
)
I lift this layer into keras functional model like:
inputs = tf.keras.Input(
shape=input_shape,
batch_size=None,
dtype=tf.dtypes.float32,
)
annotated_net = quantize_annotate_layer(
MnistNet(num_classes),
quantize_config=MnistNetQuantizeConfig(),
)
outputs, _ = annotated_net(inputs)
model_custom = tf.keras.Model(inputs=inputs, outputs=outputs, name='MnistCustom')
Screenshots If applicable, add screenshots to help explain your problem.
Additional context Add any other context about the problem here.
Issue Analytics
- State:
- Created 3 years ago
- Comments:7 (4 by maintainers)

Top Related StackOverflow Question
Hi @kalaluthien,
Thanks a lot for the detailed report. This is very helpful in reproducing the issue.
The removal of layer name from weights was done since new weights get created with augmented layer names (dense_2 instead of dense_1). This causes the weight names to not match when storing the values after graph transformations. However, you rightly point out, that this fails in cases where a layer contains multiple layers which may have the same weight name. I’ll work on a fix for this. Our test set did not include a layer with multiple weights of the same name.
This is a similar issue. I’ll work on a fix for it and let you know once it’s out.
Thanks once again for the feedback!
Thank you for your reminder. I’ll share this patch to our company. BTW, I have workaround this issue by porting every internal custom layer to the bunch of functional layers.