Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

lightgbm with categorical feature error

See original GitHub issue

package version: onnx: 1.8.1 onnxruntime: 1.4.0 skl2onnx: 1.8.0 onnxmltools: 1.7.0

here is my code

` import pandas as pd from sklearn.model_selection import train_test_split import onnx import onnxmltools

import lightgbm as lgb from lightgbm import LGBMClassifier from skl2onnx.common.data_types import FloatTensorType from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa from skl2onnx import convert_sklearn, update_registered_converter, version import onnxruntime as rt

print("onnx: ", onnx.version) print("onnxruntime: ", rt.version) print("skl2onnx: ", version) print("onnxmltools: ",onnxmltools.version)

CATE_MODE = True sample_name=‘sample2.csv’ cate_col_str = ‘mid,204,2591,2603’

parameters = { ‘n_jobs’: 1, ‘objective’: ‘binary’, ‘metric’: ‘auc’, ‘is_unbalance’: ‘true’, ‘boosting’: ‘gbdt’, ‘num_leaves’: 50, ‘num_trees’: 100, ‘feature_fraction’: 0.9, ‘bagging_fraction’: 0.8, ‘learning_rate’: 0.1, ‘min_data_in_leaf’: 500, ‘max_depth’: 15, ‘verbose’: 0, }

flat = pd.read_csv(sample_name, sep=‘\t’) cols = flat.columns

features = [] for i in cols: if i != ‘label’: features.append(i)

for i in flat.columns: flat[i] = flat[i].astype(float)

flat[‘label’] = flat[‘label’].astype(int)

if CATE_MODE: for cate_col in cate_col_str.split(‘,’): flat = flat[(flat[cate_col] >= 0) & (flat[cate_col] % 1 == 0)] flat[cate_col] = flat[cate_col].astype(‘category’)

label = [‘label’] x = flat[features] y = flat[label]

x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) train_data = lgb.Dataset(x_train, y_train, free_raw_data=False) test_data = lgb.Dataset(x_test, label=y_test, free_raw_data=False) valid_data = lgb.Dataset(x_valid, label=y_valid, free_raw_data=False)

clf = LGBMClassifier(**parameters) clf.fit(X=x_train, y=y_train, eval_set=[(x_valid, y_valid)], early_stopping_rounds=10) print(‘best score’, clf.best_score_)

update_registered_converter( LGBMClassifier, ‘LightGbmLGBMClassifier’, calculate_linear_classifier_output_shapes, convert_lightgbm, options={‘nocl’: [True, False], ‘zipmap’: [True, False] })

version = ‘testtest’ onnx_name = f"lgb_skl_{version}.onnx"

dim = x_train.shape[1] initial_type = [(‘float_input’, FloatTensorType([None, dim]))] onx = convert_sklearn(clf, initial_types=initial_type, target_opset=12, options={id(clf): {‘zipmap’: False}})

print(dim)

with open(onnx_name, “wb”) as f: f.write(onx.SerializeToString()) print(‘success’) `

When I set CATE_MODE=False, ONNX can be saved successfully. When I set CATE_MODE=True, and a small dataset sample.csv, ONNX can be saved successfully. When I set CATE_MODE=True, with a large dataset, sample2.csv. Here is the error:

` ValueError Traceback (most recent call last) /usr/local/lib/python3.6/dist-packages/skl2onnx/common/_container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, name, **attrs) 540 node = make_node(op_type, inputs, outputs, name=name, –> 541 _dtype=dtype, **attrs) 542 except ValueError as e:

/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in make_node(op_type, inputs, outputs, name, doc_string, domain, _dtype, **kwargs) 67 make_attribute(key, value, dtype=_dtype, domain=domain) —> 68 for key, value in sorted(kwargs.items())) 69 return node

/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in <genexpr>(.0) 67 make_attribute(key, value, dtype=_dtype, domain=domain) —> 68 for key, value in sorted(kwargs.items())) 69 return node

/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in make_attribute(key, value, dtype, domain, doc_string) 183 key, type(value), dtype, –> 184 [type(_) for _, __ in zip(value, range(0, 5))])) 185 else:

ValueError: You passed in an iterable attribute but I cannot figure out its applicable type, key=‘nodes_values’, type=<class ‘list’>, dtype=None, types=[<class ‘float’>, <class ‘float’>, <class ‘float’>, <class ‘float’>, <class ‘float’>].

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call last) <ipython-input-29-e6c46f777ebd> in <module> 1 model_def = to_onnx(clf, x_train.values.astype(numpy.float32), ----> 2 options={id(clf): {‘zipmap’: False}})

/usr/local/lib/python3.6/dist-packages/skl2onnx/convert.py in to_onnx(model, X, name, initial_types, target_opset, options, white_op, black_op, final_types, dtype) 212 name=name, options=options, 213 white_op=white_op, black_op=black_op, –> 214 final_types=final_types, dtype=dtype) 215 216

/usr/local/lib/python3.6/dist-packages/skl2onnx/convert.py in convert_sklearn(model, name, initial_types, doc_string, target_opset, custom_conversion_functions, custom_shape_calculators, custom_parsers, options, intermediate, white_op, black_op, final_types, dtype) 160 onnx_model = convert_topology(topology, name, doc_string, target_opset, 161 options=options, –> 162 remove_identity=not intermediate) 163 164 return (onnx_model, topology) if intermediate else onnx_model

/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_topology.py in convert_topology(topology, model_name, doc_string, target_opset, channel_first_inputs, options, remove_identity) 1085 type(getattr(operator, ‘raw_model’, None)))) 1086 container.validate_options(operator) -> 1087 conv(scope, operator, container) 1088 1089 # Create a graph from its main components

/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_registration.py in call(self, *args) 27 if args[1].raw_operator is not None: 28 args[2]._get_allowed_options(args[1].raw_operator) —> 29 return self._fct(*args) 30 31 def get_allowed_options(self):

/usr/local/lib/python3.6/dist-packages/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py in convert_lightgbm(scope, operator, container) 300 ‘TreeEnsembleClassifier’, operator.input_full_names, 301 [label_tensor_name, probability_tensor_name], –> 302 op_domain=‘ai.onnx.ml’, **attrs) 303 304 prob_tensor = probability_tensor_name

/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, name, **attrs) 542 except ValueError as e: 543 raise ValueError(“Unable to create node ‘{}’ with name=‘{}’.” –> 544 “”.format(op_type, name)) from e 545 node.domain = op_domain 546

ValueError: Unable to create node ‘TreeEnsembleClassifier’ with name=‘LightGbmLGBMClassifier’. `

Also, When I change ‘num_trees’ from 100 to 10(CATE_MODE=True, with a large dataset), the error disappear.

Issue Analytics

State:
Created 3 years ago
Comments:10

Top GitHub Comments

1reaction

xaduprecommented, Aug 20, 2021

Your example don’t fail anymore with the latest changes in skl2onnx and onnxmltools (version 1.9.0, to be release soon, available but available on github).

0reactions

xaduprecommented, Aug 27, 2021

I’ll close the issue. Feel free to reopen if needed.