Can't Read from GCP - `.zmetadata` Not Being Generated
See original GitHub issueCode Example
import gcsfs
import numcodecs
import xarray as xr
def save_da_to_zarr(da, zarr_bucket,
variable_name,
chunks=(1, 3)):
"""
Saves a data-array to a zarr bucket on GCP,
by default each entry in the x dimension has
its own chunk.
"""
out_store = gcsfs.GCSMap(root=zarr_bucket, gcs=gcsfs.GCSFileSystem())
ds = xr.Dataset({variable_name: da.chunk(chunks)})
encoding = {
variable_name: {
'compressor': numcodecs.Blosc(cname='zstd', clevel=5),
'chunks': chunks
}
}
ds.to_zarr(out_store, mode='w', encoding=encoding)
return
def load_ds_from_zarr(zarr_bucket):
gcs = gcsfs.GCSFileSystem()
store = gcsfs.GCSMap(root=zarr_bucket, gcs=gcs)
ds = xr.open_zarr(store, consolidated=True)
return ds
da = xr.DataArray(np.random.randn(2, 3), dims=('x', 'y'), coords={'x': [10, 20], 'y': [60, 70, 80]})
zarr_bucket = 'path/to/GCP/bucket'
variable_name = 'test_dataset'
save_da_to_zarr(da, zarr_bucket, variable_name)
ds_returned = load_ds_from_zarr(zarr_bucket)
Problem description
I’m trying to save an Xarray Dataset to Zarr and then load it back into an Xarray Dataset. The issue I’m having is that the .zmetadata isn’t being saved which then causes issues when trying to load it. The bucket looks like this:
- .zattrs
- .zgroup
- test_dataset/
- x/
- y/
Any advice would be much appreciated!
Error returned when trying to load:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
~\anaconda3\envs\satip_dev\lib\site-packages\fsspec\mapping.py in __getitem__(self, key, default)
131 try:
--> 132 result = self.fs.cat(k)
133 except self.missing_exceptions:
~\anaconda3\envs\satip_dev\lib\site-packages\fsspec\asyn.py in cat(self, path, recursive, on_error, **kwargs)
229 if ex:
--> 230 raise ex
231 if (
~\anaconda3\envs\satip_dev\lib\site-packages\gcsfs\core.py in _cat_file(self, path)
825 u2 = self.url(path)
--> 826 headers, out = await self._call("GET", u2)
827 return out
~\anaconda3\envs\satip_dev\lib\site-packages\gcsfs\core.py in _call(self, method, path, json_out, info_out, *args, **kwargs)
506
--> 507 self.validate_response(status, contents, json, path, headers)
508 break
~\anaconda3\envs\satip_dev\lib\site-packages\gcsfs\core.py in validate_response(self, status, content, json, path, headers)
1219 if status == 404:
-> 1220 raise FileNotFoundError
1221 elif status == 403:
FileNotFoundError:
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-78-4cd9bab68b17> in <module>
38
39 save_da_to_zarr(da, zarr_bucket, variable_name)
---> 40 ds_returned = load_ds_from_zarr(zarr_bucket)
41
42 ds_returned
<ipython-input-78-4cd9bab68b17> in load_ds_from_zarr(zarr_bucket)
29 gcs = gcsfs.GCSFileSystem()
30 store = gcsfs.GCSMap(root=zarr_bucket, gcs=gcs)
---> 31 ds = xr.open_zarr(store, consolidated=True)
32
33 return ds
~\anaconda3\envs\satip_dev\lib\site-packages\xarray\backends\zarr.py in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, decode_timedelta, use_cftime, **kwargs)
659 # reads or writes from a store, never both. For open_zarr, we only read
660 mode = "r"
--> 661 zarr_store = ZarrStore.open_group(
662 store,
663 mode=mode,
~\anaconda3\envs\satip_dev\lib\site-packages\xarray\backends\zarr.py in open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store)
288 if consolidated:
289 # TODO: an option to pass the metadata_key keyword
--> 290 zarr_group = zarr.open_consolidated(store, **open_kwargs)
291 else:
292 zarr_group = zarr.open_group(store, **open_kwargs)
~\anaconda3\envs\satip_dev\lib\site-packages\zarr\convenience.py in open_consolidated(store, metadata_key, mode, **kwargs)
1172
1173 # setup metadata sotre
-> 1174 meta_store = ConsolidatedMetadataStore(store, metadata_key=metadata_key)
1175
1176 # pass through
~\anaconda3\envs\satip_dev\lib\site-packages\zarr\storage.py in __init__(self, store, metadata_key)
2672
2673 # retrieve consolidated metadata
-> 2674 meta = json_loads(store[metadata_key])
2675
2676 # check format of consolidated metadata
~\anaconda3\envs\satip_dev\lib\site-packages\fsspec\mapping.py in __getitem__(self, key, default)
134 if default is not None:
135 return default
--> 136 raise KeyError(key)
137 return result
138
KeyError: '.zmetadata'
Version and installation information
- Zarr version: 2.5.0
- Numcodecs version: 0.7.2
- Python version: 3.8.6 | packaged by conda-forge
- OS: Windows
- Installed using: Conda
Issue Analytics
- State:
- Created 3 years ago
- Comments:8 (5 by maintainers)
Top Results From Across the Web
Why can't I access Metadata Server of GCP Instance?
Go to Compute Engine -> VM instances. · Stop the instance. · Click edit and scroll down to "Service account". · Choose the...
Read more >Access VM metadata - Compute Engine - Google Cloud
In the Google Cloud console, go to the Metadata page. From the Metadata tab, you can review most of your custom project metadata...
Read more >Can't retrieve image name from metadata server
If instance was created from the snapshot then image field in metadata is empty.
Read more >can't connect to instance via SSH - Google Groups
As I ve looked up now, I recognize it may be a cause, but I can't say exactly. ... Are keys supposed to...
Read more >GitHub - googleapis/gcp-metadata
Get the metadata from a Google Cloud Platform environment - GitHub ... Read more about the client libraries for Cloud APIs, including the...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
In theory xarray and dask should handle all of this for you automatically.
I hand’t realised that, even better. Thanks!