deepcopying variable raises `TypeError: h5py objects cannot be pickled` (Dataset.sortby)
See original GitHub issueWhat happened:
When using xr.open_dataset
with H5NetCDFDataStore
and opened h5py.File
handle deepcopy
in Dataset.sortby
/align
leads to TypeError: h5py objects cannot be pickled
.
What you expected to happen:
While applying Dataset.sortby
no error should be raised.
Minimal Complete Verifiable Example:
# create hdf5 file
import h5py
f = h5py.File('myfile.h5','w')
dset = f.create_dataset("data", (360, 1000))
f.close()
import h5netcdf
import xarray as xr
import numpy as np
f = h5netcdf.File("myfile.h5", "r", phony_dims="access")
s0 = xr.backends.H5NetCDFStore(f)
ds = xr.open_dataset(s0, engine="h5netcdf", chunks=None)
ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
ds.sortby('phony_dim_0')
ds.close()
Error Traceback
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-0e1377169cb3> in <module>
5 ds = xr.open_dataset(s0, engine="h5netcdf", chunks=None)
6 ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
----> 7 ds.sortby('phony_dim_0')
8 ds.close()
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in sortby(self, variables, ascending)
5293 variables = variables
5294 variables = [v if isinstance(v, DataArray) else self[v] for v in variables]
-> 5295 aligned_vars = align(self, *variables, join="left")
5296 aligned_self = aligned_vars[0]
5297 aligned_other_vars = aligned_vars[1:]
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/alignment.py in align(join, copy, indexes, exclude, fill_value, *objects)
336 if not valid_indexers:
337 # fast path for no reindexing necessary
--> 338 new_obj = obj.copy(deep=copy)
339 else:
340 new_obj = obj.reindex(copy=copy, fill_value=fill_value, **valid_indexers)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in copy(self, deep, data)
1076 """
1077 if data is None:
-> 1078 variables = {k: v.copy(deep=deep) for k, v in self._variables.items()}
1079 elif not utils.is_dict_like(data):
1080 raise ValueError("Data must be dict-like")
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in <dictcomp>(.0)
1076 """
1077 if data is None:
-> 1078 variables = {k: v.copy(deep=deep) for k, v in self._variables.items()}
1079 elif not utils.is_dict_like(data):
1080 raise ValueError("Data must be dict-like")
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/variable.py in copy(self, deep, data)
938
939 if deep:
--> 940 data = copy.deepcopy(data)
941
942 else:
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
159 reductor = getattr(x, "__reduce_ex__", None)
160 if reductor is not None:
--> 161 rv = reductor(4)
162 else:
163 reductor = getattr(x, "__reduce__", None)
/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/h5py/_hl/base.py in __getnewargs__(self)
306 limitations, look at the h5pickle project on PyPI.
307 """
--> 308 raise TypeError("h5py objects cannot be pickled")
309
310 def __getstate__(self):
TypeError: h5py objects cannot be pickled
Anything else we need to know?:
When invoked with chunks={}
it works as well as if the following code is used:
ds = xr.open_dataset('myfile.h5', group='/', engine='h5netcdf', backend_kwargs=dict(phony_dims='access'))
ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
ds.sortby('phony_dim_0')
ds.close()
This was introduced by #4221, see https://github.com/pydata/xarray/blob/66ab0ae4f3aa3c461357a5a895405e81357796b1/xarray/core/variable.py#L939-L941
Before:
if deep and (
hasattr(data, "__array_function__")
or isinstance(data, dask_array_type)
or (not IS_NEP18_ACTIVE and isinstance(data, np.ndarray))
):
data = copy.deepcopy(data)
All three of the above tests return False
in my case, so deepcopy should never be used here.
Environment:
Output of <tt>xr.show_versions()</tt>
INSTALLED VERSIONS
commit: None python: 3.8.5 | packaged by conda-forge | (default, Aug 29 2020, 01:22:49) [GCC 7.5.0] python-bits: 64 OS: Linux OS-release: 4.12.14-lp151.28.67-default machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: de_DE.UTF-8 LOCALE: de_DE.UTF-8 libhdf5: 1.10.6 libnetcdf: 4.7.4
xarray: 0.16.1.dev86+g264fdb29 pandas: 1.1.1 numpy: 1.19.1 scipy: 1.5.0 netCDF4: 1.5.4 pydap: None h5netcdf: 0.8.0 h5py: 2.10.0 Nio: None zarr: 2.4.0 cftime: 1.2.1 nc_time_axis: None PseudoNetCDF: None rasterio: None cfgrib: 0.9.8.4 iris: None bottleneck: 1.3.2 dask: 2.19.0 distributed: 2.25.0 matplotlib: 3.3.1 cartopy: 0.18.0 seaborn: None numbagg: None pint: None setuptools: 49.6.0.post20200814 pip: 20.2.2 conda: 4.8.3 pytest: 5.4.3 IPython: 7.18.1 sphinx: None
Issue Analytics
- State:
- Created 3 years ago
- Comments:5 (5 by maintainers)
Hmm. It appears that one advantage of the old implementation for is that it meant that calling copy with
deep=True
would always succeed.See https://github.com/pydata/xarray/pull/4426 for a potential fix.
Thanks for digging on this. I don’t have an answer, but I wanted to tag as related to #4242.