lightgbm with categorical feature error
See original GitHub issuepackage version: onnx: 1.8.1 onnxruntime: 1.4.0 skl2onnx: 1.8.0 onnxmltools: 1.7.0
here is my code
` import pandas as pd from sklearn.model_selection import train_test_split import onnx import onnxmltools
import lightgbm as lgb from lightgbm import LGBMClassifier from skl2onnx.common.data_types import FloatTensorType from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa from skl2onnx import convert_sklearn, update_registered_converter, version import onnxruntime as rt
print("onnx: ", onnx.version) print("onnxruntime: ", rt.version) print("skl2onnx: ", version) print("onnxmltools: ",onnxmltools.version)
CATE_MODE = True sample_name=‘sample2.csv’ cate_col_str = ‘mid,204,2591,2603’
parameters = { ‘n_jobs’: 1, ‘objective’: ‘binary’, ‘metric’: ‘auc’, ‘is_unbalance’: ‘true’, ‘boosting’: ‘gbdt’, ‘num_leaves’: 50, ‘num_trees’: 100, ‘feature_fraction’: 0.9, ‘bagging_fraction’: 0.8, ‘learning_rate’: 0.1, ‘min_data_in_leaf’: 500, ‘max_depth’: 15, ‘verbose’: 0, }
flat = pd.read_csv(sample_name, sep=‘\t’) cols = flat.columns
features = [] for i in cols: if i != ‘label’: features.append(i)
for i in flat.columns: flat[i] = flat[i].astype(float)
flat[‘label’] = flat[‘label’].astype(int)
if CATE_MODE: for cate_col in cate_col_str.split(‘,’): flat = flat[(flat[cate_col] >= 0) & (flat[cate_col] % 1 == 0)] flat[cate_col] = flat[cate_col].astype(‘category’)
label = [‘label’] x = flat[features] y = flat[label]
x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) train_data = lgb.Dataset(x_train, y_train, free_raw_data=False) test_data = lgb.Dataset(x_test, label=y_test, free_raw_data=False) valid_data = lgb.Dataset(x_valid, label=y_valid, free_raw_data=False)
clf = LGBMClassifier(**parameters) clf.fit(X=x_train, y=y_train, eval_set=[(x_valid, y_valid)], early_stopping_rounds=10) print(‘best score’, clf.best_score_)
update_registered_converter( LGBMClassifier, ‘LightGbmLGBMClassifier’, calculate_linear_classifier_output_shapes, convert_lightgbm, options={‘nocl’: [True, False], ‘zipmap’: [True, False] })
version = ‘testtest’ onnx_name = f"lgb_skl_{version}.onnx"
dim = x_train.shape[1] initial_type = [(‘float_input’, FloatTensorType([None, dim]))] onx = convert_sklearn(clf, initial_types=initial_type, target_opset=12, options={id(clf): {‘zipmap’: False}})
print(dim)
with open(onnx_name, “wb”) as f: f.write(onx.SerializeToString()) print(‘success’) `
When I set CATE_MODE=False, ONNX can be saved successfully. When I set CATE_MODE=True, and a small dataset sample.csv, ONNX can be saved successfully. When I set CATE_MODE=True, with a large dataset, sample2.csv. Here is the error:
` ValueError Traceback (most recent call last) /usr/local/lib/python3.6/dist-packages/skl2onnx/common/_container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, name, **attrs) 540 node = make_node(op_type, inputs, outputs, name=name, –> 541 _dtype=dtype, **attrs) 542 except ValueError as e:
/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in make_node(op_type, inputs, outputs, name, doc_string, domain, _dtype, **kwargs) 67 make_attribute(key, value, dtype=_dtype, domain=domain) —> 68 for key, value in sorted(kwargs.items())) 69 return node
/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in <genexpr>(.0) 67 make_attribute(key, value, dtype=_dtype, domain=domain) —> 68 for key, value in sorted(kwargs.items())) 69 return node
/usr/local/lib/python3.6/dist-packages/skl2onnx/proto/onnx_helper_modified.py in make_attribute(key, value, dtype, domain, doc_string) 183 key, type(value), dtype, –> 184 [type(_) for _, __ in zip(value, range(0, 5))])) 185 else:
ValueError: You passed in an iterable attribute but I cannot figure out its applicable type, key=‘nodes_values’, type=<class ‘list’>, dtype=None, types=[<class ‘float’>, <class ‘float’>, <class ‘float’>, <class ‘float’>, <class ‘float’>].
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last) <ipython-input-29-e6c46f777ebd> in <module> 1 model_def = to_onnx(clf, x_train.values.astype(numpy.float32), ----> 2 options={id(clf): {‘zipmap’: False}})
/usr/local/lib/python3.6/dist-packages/skl2onnx/convert.py in to_onnx(model, X, name, initial_types, target_opset, options, white_op, black_op, final_types, dtype) 212 name=name, options=options, 213 white_op=white_op, black_op=black_op, –> 214 final_types=final_types, dtype=dtype) 215 216
/usr/local/lib/python3.6/dist-packages/skl2onnx/convert.py in convert_sklearn(model, name, initial_types, doc_string, target_opset, custom_conversion_functions, custom_shape_calculators, custom_parsers, options, intermediate, white_op, black_op, final_types, dtype) 160 onnx_model = convert_topology(topology, name, doc_string, target_opset, 161 options=options, –> 162 remove_identity=not intermediate) 163 164 return (onnx_model, topology) if intermediate else onnx_model
/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_topology.py in convert_topology(topology, model_name, doc_string, target_opset, channel_first_inputs, options, remove_identity) 1085 type(getattr(operator, ‘raw_model’, None)))) 1086 container.validate_options(operator) -> 1087 conv(scope, operator, container) 1088 1089 # Create a graph from its main components
/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_registration.py in call(self, *args) 27 if args[1].raw_operator is not None: 28 args[2]._get_allowed_options(args[1].raw_operator) —> 29 return self._fct(*args) 30 31 def get_allowed_options(self):
/usr/local/lib/python3.6/dist-packages/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py in convert_lightgbm(scope, operator, container) 300 ‘TreeEnsembleClassifier’, operator.input_full_names, 301 [label_tensor_name, probability_tensor_name], –> 302 op_domain=‘ai.onnx.ml’, **attrs) 303 304 prob_tensor = probability_tensor_name
/usr/local/lib/python3.6/dist-packages/skl2onnx/common/_container.py in add_node(self, op_type, inputs, outputs, op_domain, op_version, name, **attrs) 542 except ValueError as e: 543 raise ValueError(“Unable to create node ‘{}’ with name=‘{}’.” –> 544 “”.format(op_type, name)) from e 545 node.domain = op_domain 546
ValueError: Unable to create node ‘TreeEnsembleClassifier’ with name=‘LightGbmLGBMClassifier’. `
Also, When I change ‘num_trees’ from 100 to 10(CATE_MODE=True, with a large dataset), the error disappear.
Issue Analytics
- State:
- Created 3 years ago
- Comments:10
Your example don’t fail anymore with the latest changes in skl2onnx and onnxmltools (version 1.9.0, to be release soon, available but available on github).
I’ll close the issue. Feel free to reopen if needed.