Botocore version >= 1.23 impact pandas.read_csv("s3://bucket/foo.csv")
See original GitHub issueDescribe the bug
We found when upgrading botocore to a version >= 1.23.0 the pandas.read_csv(“s3://bucket/foo.csv”) fail with exception:
AttributeError: 'AioClientCreator' object has no attribute '_register_lazy_block_unknown_fips_pseudo_regions
Steps to reproduce Code snippet:
import pandas
df = pandas.read_csv("s3://bucket/foo.csv",sep=",")
The pandas version is 1.0.1 Expected behavior The pandas dataframe should be defined. Using botocore version 1.22.9 for example. The sample code execute as normal.
The workaround we use at the moment is to go back to botocore 1.22.5
Debug logs
674 )
675
--> 676 return _read(filepath_or_buffer, kwds)
677
678 parser_f.__name__ = name
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
429 # See https://github.com/python/mypy/issues/1297
430 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 431 filepath_or_buffer, encoding, compression
432 )
433 kwds["compression"] = compression
/opt/conda/lib/python3.7/site-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
183
184 return s3.get_filepath_or_buffer(
--> 185 filepath_or_buffer, encoding=encoding, compression=compression, mode=mode
186 )
187
/opt/conda/lib/python3.7/site-packages/pandas/io/s3.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
46 mode: Optional[str] = None,
47 ) -> Tuple[IO, Optional[str], Optional[str], bool]:
---> 48 file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
49 return file, None, compression, True
/opt/conda/lib/python3.7/site-packages/pandas/io/s3.py in get_file_and_filesystem(filepath_or_buffer, mode)
27 fs = s3fs.S3FileSystem(anon=False)
28 try:
---> 29 file = fs.open(_strip_schema(filepath_or_buffer), mode)
30 except (FileNotFoundError, NoCredentialsError):
31 # boto3 has troubles when trying to access a public file
/opt/conda/lib/python3.7/site-packages/fsspec/spec.py in open(self, path, mode, block_size, cache_options, **kwargs)
1014 autocommit=ac,
1015 cache_options=cache_options,
-> 1016 **kwargs,
1017 )
1018 if not ac and "r" not in mode:
/opt/conda/lib/python3.7/site-packages/s3fs/core.py in _open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)
549 cache_type=cache_type,
550 autocommit=autocommit,
--> 551 requester_pays=requester_pays,
552 )
553
/opt/conda/lib/python3.7/site-packages/s3fs/core.py in __init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)
1791 self.version_id = self.details.get("VersionId")
1792 super().__init__(
-> 1793 s3, path, mode, block_size, autocommit=autocommit, cache_type=cache_type
1794 )
1795 self.s3 = self.fs # compatibility
/opt/conda/lib/python3.7/site-packages/fsspec/spec.py in __init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
1342 self.size = size
1343 else:
-> 1344 self.size = self.details["size"]
1345 self.cache = caches[cache_type](
1346 self.blocksize, self._fetch_range, self.size, **cache_options
/opt/conda/lib/python3.7/site-packages/fsspec/spec.py in details(self)
1355 def details(self):
1356 if self._details is None:
-> 1357 self._details = self.fs.info(self.path)
1358 return self._details
1359
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs)
89 def wrapper(*args, **kwargs):
90 self = obj or args[0]
---> 91 return sync(self.loop, func, *args, **kwargs)
92
93 return wrapper
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in sync(loop, func, timeout, *args, **kwargs)
69 raise FSTimeoutError from return_result
70 elif isinstance(return_result, BaseException):
---> 71 raise return_result
72 else:
73 return return_result
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in _runner(event, coro, result, timeout)
23 coro = asyncio.wait_for(coro, timeout=timeout)
24 try:
---> 25 result[0] = await coro
26 except Exception as ex:
27 result[0] = ex
/opt/conda/lib/python3.7/site-packages/s3fs/core.py in _info(self, path, bucket, key, refresh, version_id)
1020 Key=key,
1021 **version_id_kw(version_id),
-> 1022 **self.req_kw,
1023 )
1024 return {
/opt/conda/lib/python3.7/site-packages/s3fs/core.py in _call_s3(self, method, *akwarglist, **kwargs)
239
240 async def _call_s3(self, method, *akwarglist, **kwargs):
--> 241 await self.set_session()
242 s3 = await self.get_s3(kwargs.get("Bucket"))
243 method = getattr(s3, method)
/opt/conda/lib/python3.7/site-packages/s3fs/core.py in set_session(self, refresh, kwargs)
407 "s3", config=conf, **init_kwargs, **client_kwargs
408 )
--> 409 self._s3 = await s3creator.__aenter__()
410
411 self._s3creator = s3creator
/opt/conda/lib/python3.7/site-packages/aiobotocore/session.py in __aenter__(self)
35
36 async def __aenter__(self) -> AioBaseClient:
---> 37 self._client = await self._coro
38 return await self._client.__aenter__()
39
/opt/conda/lib/python3.7/site-packages/aiobotocore/session.py in _create_client(self, service_name, region_name, api_version, use_ssl, verify, endpoint_url, aws_access_key_id, aws_secret_access_key, aws_session_token, config)
123 is_secure=use_ssl, endpoint_url=endpoint_url, verify=verify,
124 credentials=credentials, scoped_config=self.get_scoped_config(),
--> 125 client_config=config, api_version=api_version)
126 monitor = self._get_internal_component('monitor')
127 if monitor is not None:
/opt/conda/lib/python3.7/site-packages/aiobotocore/client.py in create_client(self, service_name, region_name, is_secure, endpoint_url, verify, credentials, scoped_config, api_version, client_config)
44 service_client, endpoint_url, client_config
45 )
---> 46 self._register_lazy_block_unknown_fips_pseudo_regions(service_client)
47 return service_client
48
AttributeError: 'AioClientCreator' object has no attribute '_register_lazy_block_unknown_fips_pseudo_regions'
Issue Analytics
- State:
- Created 2 years ago
- Reactions:7
- Comments:7 (1 by maintainers)
Top Results From Across the Web
How to import a text file on AWS S3 into pandas without ...
I want to load it into pandas but cannot save it first because I am running on a heroku server. Here is what...
Read more >Reading and writing files from/to Amazon S3 with Pandas
Reading and writing files from/to Amazon S3 with Pandas using the boto3 library and s3fs-supported pandas APIs.
Read more >Quickstart — Boto3 Docs 1.23.3 documentation - Amazon AWS
This guide details the steps needed to install or update the AWS SDK for Python. The SDK is composed of two key Python...
Read more >Faster Data Loading for Pandas on S3 | by Joshua Robinson
The baseline load uses the Pandas read_csv operation which leverages the s3fs and boto3 python libraries to retrieve the data from an object...
Read more >News Blog Paper China
The pre-processed dataset is then uploaded to the s3 bucket for further ... and read DataFrame:df = pd.read_csv('/content/weather.csv')dfNow we will inspect ...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
I’m stumped! I see you edited your initial comment and it looks like at least you now have a workaround (using botocore 1.22.6). Hopefully someone else will pick this up as not sure what else it could be…
Thank you crispinlogan