Errors with pmlb.fetch_data()
See original GitHub issuehello,
Just recently I encountered the following error when using pmlb.fetch_data() in python with a jupyter notebook. The python version is 3.7.4, and the pmlb version is 1.0.2a0 or 1.0.1.post3. Could you let us know what might be the problem? Thanks!
from pmlb import fetch_data
Returns a pandas DataFrame
mushroom = fetch_data(‘mushroom’) mushroom.describe().transpose()
SSLCertVerificationError Traceback (most recent call last) ~\AppData\Roaming\Python\Python37\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 676 headers=headers, –> 677 chunked=chunked, 678 )
~\AppData\Roaming\Python\Python37\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 380 try: –> 381 self._validate_conn(conn) 382 except (SocketTimeout, BaseSSLError) as e:
~\AppData\Roaming\Python\Python37\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
977 if not getattr(conn, “sock”, None): # AppEngine might not have .sock
–> 978 conn.connect()
979
~\AppData\Roaming\Python\Python37\site-packages\urllib3\connection.py in connect(self) 370 server_hostname=server_hostname, –> 371 ssl_context=context, 372 )
~\AppData\Roaming\Python\Python37\site-packages\urllib3\util\ssl_.py in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data) 383 if HAS_SNI and server_hostname is not None: –> 384 return context.wrap_socket(sock, server_hostname=server_hostname) 385
~\AppData\Local\Continuum\anaconda3\lib\ssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session) 422 context=self, –> 423 session=session 424 )
~\AppData\Local\Continuum\anaconda3\lib\ssl.py in _create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session) 869 raise ValueError(“do_handshake_on_connect should not be specified for non-blocking sockets”) –> 870 self.do_handshake() 871 except (OSError, ValueError):
~\AppData\Local\Continuum\anaconda3\lib\ssl.py in do_handshake(self, block) 1138 self.settimeout(None) -> 1139 self._sslobj.do_handshake() 1140 finally:
SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1076)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last) ~\AppData\Roaming\Python\Python37\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 448 retries=self.max_retries, –> 449 timeout=timeout 450 )
~\AppData\Roaming\Python\Python37\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 726 retries = retries.increment( –> 727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2] 728 )
~\AppData\Roaming\Python\Python37\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace) 438 if new_retry.is_exhausted(): –> 439 raise MaxRetryError(_pool, url, error or ResponseError(cause)) 440
MaxRetryError: HTTPSConnectionPool(host=‘media.githubusercontent.com’, port=443): Max retries exceeded with url: /media/EpistasisLab/pmlb/master/datasets/mushroom/mushroom.tsv.gz (Caused by SSLError(SSLCertVerificationError(1, ‘[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1076)’)))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last) <ipython-input-4-766371873a88> in <module> 2 3 # Returns a pandas DataFrame ----> 4 mushroom = fetch_data(‘mushroom’) 5 mushroom.describe().transpose()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pmlb\pmlb.py in fetch_data(dataset_name, return_X_y, local_cache_dir, dropna) 77 raise ValueError(‘Dataset not found in PMLB.’) 78 dataset_url = get_dataset_url(GITHUB_URL, —> 79 dataset_name, suffix) 80 dataset = pd.read_csv(dataset_url, sep=‘\t’, compression=‘gzip’) 81 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pmlb\pmlb.py in get_dataset_url(GITHUB_URL, dataset_name, suffix) 116 ) 117 –> 118 re = requests.get(dataset_url) 119 if re.status_code != 200: 120 raise ValueError(‘Dataset not found in PMLB.’)
~\AppData\Roaming\Python\Python37\site-packages\requests\api.py in get(url, params, **kwargs) 74 75 kwargs.setdefault(‘allow_redirects’, True) —> 76 return request(‘get’, url, params=params, **kwargs) 77 78
~\AppData\Roaming\Python\Python37\site-packages\requests\api.py in request(method, url, **kwargs) 59 # cases, and look like a memory leak in others. 60 with sessions.Session() as session: —> 61 return session.request(method=method, url=url, **kwargs) 62 63
~\AppData\Roaming\Python\Python37\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 528 } 529 send_kwargs.update(settings) –> 530 resp = self.send(prep, **send_kwargs) 531 532 return resp
~\AppData\Roaming\Python\Python37\site-packages\requests\sessions.py in send(self, request, **kwargs) 663 # Redirect resolving generator. 664 gen = self.resolve_redirects(r, request, **kwargs) –> 665 history = [resp for resp in gen] 666 else: 667 history = []
~\AppData\Roaming\Python\Python37\site-packages\requests\sessions.py in <listcomp>(.0) 663 # Redirect resolving generator. 664 gen = self.resolve_redirects(r, request, **kwargs) –> 665 history = [resp for resp in gen] 666 else: 667 history = []
~\AppData\Roaming\Python\Python37\site-packages\requests\sessions.py in resolve_redirects(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs) 243 proxies=proxies, 244 allow_redirects=False, –> 245 **adapter_kwargs 246 ) 247
~\AppData\Roaming\Python\Python37\site-packages\requests\sessions.py in send(self, request, **kwargs) 641 642 # Send the request –> 643 r = adapter.send(request, **kwargs) 644 645 # Total elapsed time of the request (approximately)
~\AppData\Roaming\Python\Python37\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 512 if isinstance(e.reason, _SSLError): 513 # This branch is for urllib3 v1.22 and later. –> 514 raise SSLError(e, request=request) 515 516 raise ConnectionError(e, request=request)
SSLError: HTTPSConnectionPool(host=‘media.githubusercontent.com’, port=443): Max retries exceeded with url: /media/EpistasisLab/pmlb/master/datasets/mushroom/mushroom.tsv.gz (Caused by SSLError(SSLCertVerificationError(1, ‘[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1076)’)))
Issue Analytics
- State:
- Created 3 years ago
- Comments:7 (1 by maintainers)
Top GitHub Comments
Thanks that worked a charm. My bad I should have checked the README for updates.
hi @Tarek0 , please upgrade to 1.0 or newer. 1.0 introduced breaking changes as noted in the README.