Can't merge ECMWF perturbed and control forecast together as xarray since release 0.10
See original GitHub issueHi there,
Since the release 0.10.1, the EUMETNET benchmark plugin is partly broken because apparently cfgrib does not accept anymore to merge the control and perturbed ECMWF forecasts together.
I use the multi source to do that (see here), but when calling to_xarray
, I get the following issue:
DatasetBuildError Traceback (most recent call last)
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:633, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords)
632 try:
--> 633 dims, data_var, coord_vars = build_variable_components(
634 var_index,
635 encode_cf,
636 filter_by_keys,
637 errors=errors,
638 squeeze=squeeze,
639 read_keys=read_keys,
640 time_dims=time_dims,
641 extra_coords=extra_coords,
642 )
643 except DatasetBuildError as ex:
644 # NOTE: When a variable has more than one value for an attribute we need to raise all
645 # the values in the file, not just the ones associated with that variable. See #54.
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:471, in build_variable_components(index, encode_cf, filter_by_keys, log, errors, squeeze, read_keys, time_dims, extra_coords)
460 def build_variable_components(
461 index: abc.Index[T.Any, abc.Field],
462 encode_cf: T.Sequence[str] = (),
(...)
469 extra_coords: T.Dict[str, str] = {},
470 ) -> T.Tuple[T.Dict[str, int], Variable, T.Dict[str, Variable]]:
--> 471 data_var_attrs = enforce_unique_attributes(index, DATA_ATTRIBUTES_KEYS, filter_by_keys)
472 grid_type_keys = GRID_TYPE_MAP.get(index.getone("gridType"), [])
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:273, in enforce_unique_attributes(index, attributes_keys, filter_by_keys)
272 fbks.append(fbk)
--> 273 raise DatasetBuildError("multiple values for key %r" % key, key, fbks)
274 if values and values[0] not in ("undef", "unknown"):
DatasetBuildError: multiple values for key 'dataType'
During handling of the above exception, another exception occurred:
DatasetBuildError Traceback (most recent call last)
Input In [17], in <module>
----> 1 fcs = ds.to_xarray(xarray_open_dataset_kwargs=dict(squeeze=True))
2 fcs
File ~/climetlab-eumetnet-postprocessing-benchmark/climetlab_eumetnet_postprocessing_benchmark/gridded/training_data_forecasts.py:398, in TrainingDataForecastSurfacePostProcessed.to_xarray(self, **kwargs)
397 def to_xarray(self, **kwargs):
--> 398 fcs = self.source.to_xarray(**kwargs)
399 variables = list(fcs.keys())
400 ds_list = list()
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/climetlab/readers/grib/fieldset.py:203, in FieldSet.to_xarray(self, **kwargs)
185 xarray_open_dataset_kwargs[key] = mix_kwargs(
186 user=user_xarray_open_dataset_kwargs.pop(key, {}),
187 default={"errors": "raise"},
(...)
190 logging_main_key=key,
191 )
192 xarray_open_dataset_kwargs.update(
193 mix_kwargs(
194 user=user_xarray_open_dataset_kwargs,
(...)
200 )
201 )
--> 203 result = xr.open_dataset(
204 self,
205 **xarray_open_dataset_kwargs,
206 )
208 def math_prod(lst):
209 if not hasattr(math, "prod"):
210 # python 3.7 does not have math.prod
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/xarray/backends/api.py:495, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, backend_kwargs, *args, **kwargs)
483 decoders = _resolve_decoders_kwargs(
484 decode_cf,
485 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
491 decode_coords=decode_coords,
492 )
494 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 495 backend_ds = backend.open_dataset(
496 filename_or_obj,
497 drop_variables=drop_variables,
498 **decoders,
499 **kwargs,
500 )
501 ds = _dataset_from_backend_dataset(
502 backend_ds,
503 filename_or_obj,
(...)
510 **kwargs,
511 )
512 return ds
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/xarray_plugin.py:99, in CfGribBackend.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, lock, indexpath, filter_by_keys, read_keys, encode_cf, squeeze, time_dims, errors, extra_coords)
77 def open_dataset(
78 self,
79 filename_or_obj: T.Union[str, abc.MappingFieldset[T.Any, abc.Field]],
(...)
96 extra_coords: T.Dict[str, str] = {},
97 ) -> xr.Dataset:
---> 99 store = CfGribDataStore(
100 filename_or_obj,
101 indexpath=indexpath,
102 filter_by_keys=filter_by_keys,
103 read_keys=read_keys,
104 encode_cf=encode_cf,
105 squeeze=squeeze,
106 time_dims=time_dims,
107 lock=lock,
108 errors=errors,
109 extra_coords=extra_coords,
110 )
111 with xr.core.utils.close_on_error(store):
112 vars, attrs = store.load() # type: ignore
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/xarray_plugin.py:39, in CfGribDataStore.__init__(self, filename, lock, **backend_kwargs)
37 else:
38 opener = dataset.open_fieldset
---> 39 self.ds = opener(filename, **backend_kwargs)
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:730, in open_fieldset(fieldset, indexpath, filter_by_keys, read_keys, time_dims, extra_coords, computed_keys, log, **kwargs)
728 index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys)
729 filtered_index = index.subindex(filter_by_keys)
--> 730 return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs)
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:706, in open_from_index(index, read_keys, time_dims, extra_coords, **kwargs)
699 def open_from_index(
700 index: abc.Index[T.Any, abc.Field],
701 read_keys: T.Sequence[str] = (),
(...)
704 **kwargs: T.Any,
705 ) -> Dataset:
--> 706 dimensions, variables, attributes, encoding = build_dataset_components(
707 index, read_keys=read_keys, time_dims=time_dims, extra_coords=extra_coords, **kwargs
708 )
709 return Dataset(dimensions, variables, attributes, encoding)
File /opt/tljh/user/envs/climetlab_test/lib/python3.9/site-packages/cfgrib/dataset.py:654, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords)
652 fbks.append(fbk)
653 error_message += "\n filter_by_keys=%r" % fbk
--> 654 raise DatasetBuildError(error_message, key, fbks)
655 short_name = data_var.attributes.get("GRIB_shortName", "paramId_%d" % param_id)
656 var_name = data_var.attributes.get("GRIB_cfVarName", "unknown")
DatasetBuildError: multiple values for unique key, try re-open the file with one of:
filter_by_keys={'dataType': 'pf'}
filter_by_keys={'dataType': 'cf'}
So the problem seems to be that it has become more strict about the multiple values for keys, using this function enforce_unique_attributes
.
cfgrib version is 0.9.10
.
How can I resolve this issue? Can I tell him to be more forgiving about the attributes uniqueness?
Thank you in advance.
To reproduce, simply install the plugin
pip install climetlab climetlab-eumetnet-postprocessing-benchmark
and run the notebook https://github.com/Climdyn/climetlab-eumetnet-postprocessing-benchmark/tree/main/notebooks/demo_ensemble_forecasts.ipynb .
Issue Analytics
- State:
- Created 2 years ago
- Comments:7 (4 by maintainers)
Top GitHub Comments
version 0.10.3 released.
Yes, the current default behaviour of cfgrib is to raise an error when the GRIB are not consistent. The rationale is that we really do not want to change the grib file on the fly without the user deciding it explicitly. So, yes, the option “ignore_keys” needs to be added everywhere you need it.