to_zarr append with gcsmap does not work properly
See original GitHub issueMCVE Code Sample
import gcsfs
from gcsfs import mapping
import xarray as xr
gcs_bucket_name = '<bucket_name_here>'
gcsmap = mapping.GCSMap(gcs_bucket_name)
ds.to_zarr(store=gcsmap, mode='a', consolidated=True, compute=False, append_dim='time')
Expected Output
Zarr store in bucket appended on time dimension.
Problem Description
ValueError Traceback (most recent call last)
<ipython-input-103-cb22683dde66> in <module>
----> 1 ds.to_zarr(store=gcsmap, mode='a', consolidated=True, compute=False, append_dim='time')
~/anaconda3/envs/intake/lib/python3.7/site-packages/xarray/core/dataset.py in to_zarr(self, store, mode, synchronizer, group, encoding, compute, consolidated, append_dim)
1431 return to_zarr(self, store=store, mode=mode, synchronizer=synchronizer,
1432 group=group, encoding=encoding, compute=compute,
-> 1433 consolidated=consolidated, append_dim=append_dim)
1434
1435 def __repr__(self):
~/anaconda3/envs/intake/lib/python3.7/site-packages/xarray/backends/api.py in to_zarr(dataset, store, mode, synchronizer, group, encoding, compute, consolidated, append_dim)
1090 group=group,
1091 consolidated=consolidated,
-> 1092 encoding=encoding)
1093
1094 zstore = backends.ZarrStore.open_group(store=store, mode=mode,
~/anaconda3/envs/intake/lib/python3.7/site-packages/xarray/backends/api.py in _validate_append_dim_and_encoding(ds_to_append, store, append_dim, encoding, **open_kwargs)
1053 if append_dim not in ds.dims:
1054 raise ValueError(
-> 1055 "{} not a valid dimension in the Dataset".format(append_dim)
1056 )
1057 for data_var in ds_to_append:
ValueError: time not a valid dimension in the Dataset
Line 1296 in xarray.backends.api
def to_zarr(
dataset,
store=None,
mode=None,
synchronizer=None,
group=None,
encoding=None,
compute=True,
consolidated=False,
append_dim=None,
):
"""This function creates an appropriate datastore for writing a dataset to
a zarr ztore
See `Dataset.to_zarr` for full API docs.
"""
if isinstance(store, Path):
store = str(store)
The mutable mapping created by gcsfs gets turned into a string (IE '<fsspec.mapping.FSMap object at 0x10fae1278>'
) which fails to read in the zarr store correctly. This fails silently and then the ValueError for line 1055 gets tripped because there are no dims
to compare the append_dim
too.
This was tested by running
import gcsfs
from gcsfs import mapping
import xarray as xr
from xarray import backends
gcs_bucket_name = '<bucket_name_here>'
gcsmap = mapping.GCSMap(gcs_bucket_name)
# same way it is called in _validate_append_dim_and_encoding minus str gcsmap
ds = backends.zarr.open_zarr(gcsmap)
and
import gcsfs
from gcsfs import mapping
import xarray as xr
from xarray import backends
gcs_bucket_name = '<bucket_name_here>'
gcsmap = mapping.GCSMap(gcs_bucket_name)
# same way it is called in _validate_append_dim_and_encoding
ds = backends.zarr.open_zarr(str(gcsmap))
The top example reads the data in correctly while the second one fails with:
ValueError Traceback (most recent call last)
<ipython-input-102-52e941f0a8f2> in <module>
----> 1 ds = backends.zarr.open_zarr(str(gcsmap))
~/anaconda3/envs/intake/lib/python3.7/site-packages/xarray/backends/zarr.py in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, **kwargs)
554 zarr_store = ZarrStore.open_group(store, mode=mode,
555 synchronizer=synchronizer,
--> 556 group=group, consolidated=consolidated)
557 ds = maybe_decode_store(zarr_store)
558
~/anaconda3/envs/intake/lib/python3.7/site-packages/xarray/backends/zarr.py in open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close)
250 zarr_group = zarr.open_consolidated(store, **open_kwargs)
251 else:
--> 252 zarr_group = zarr.open_group(store, **open_kwargs)
253 return cls(zarr_group, consolidate_on_close)
254
~/anaconda3/envs/intake/lib/python3.7/site-packages/zarr/hierarchy.py in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store)
1114 err_contains_array(path)
1115 elif not contains_group(store, path=path):
-> 1116 err_group_not_found(path)
1117
1118 elif mode == 'w':
~/anaconda3/envs/intake/lib/python3.7/site-packages/zarr/errors.py in err_group_not_found(path)
27
28 def err_group_not_found(path):
---> 29 raise ValueError('group not found at path %r' % path)
30
31
ValueError: group not found at path ''
Output of xr.show_versions()
xarray: 0.12.3 pandas: 0.25.0 numpy: 1.17.0 scipy: None netCDF4: 1.5.1.2 pydap: None h5netcdf: 0.7.4 h5py: 2.9.0 Nio: None zarr: 2.3.2 cftime: 1.0.3.4 nc_time_axis: None PseudoNetCDF: None rasterio: None cfgrib: None iris: None bottleneck: None dask: 2.2.0 distributed: 2.2.0 matplotlib: 3.1.1 cartopy: None seaborn: None numbagg: None setuptools: 41.0.1 pip: 19.2.2 conda: None pytest: None IPython: 7.7.0 sphinx: None
Issue Analytics
- State:
- Created 4 years ago
- Comments:12 (5 by maintainers)
Top GitHub Comments
Upon further testing it looks like the error has to do with the consolidated zarr store. Even thou the orginal set was written with:
The new append to zarr with:
Fails since it reads an empty array. This might be more of a problem for zarr itself and not xarray. Going to test rewriting a netcdf file to zarr with consolidation and if the error persists then I will move to zarr for the error.
Closing since this seems to be a zarr/ zarr consolidation issue and not an xarray issue.