load_dataset for LOCAL CSV files report CONNECTION ERROR
See original GitHub issueload_dataset for LOCAL CSV files report CONNECTION ERROR
- Description: A local demo csv file:
import pandas as pd
import numpy as np
from datasets import load_dataset
import torch
import transformers
df = pd.DataFrame(np.arange(1200).reshape(300,4))
df.to_csv('test.csv', header=False, index=False)
print('datasets version: ', datasets.__version__)
print('pytorch version: ', torch.__version__)
print('transformers version: ', transformers.__version__)
# output:
datasets version: 1.1.2
pytorch version: 1.5.0
transformers version: 3.2.0
when I load data through dataset
:
dataset = load_dataset('csv', data_files='./test.csv', delimiter=',', autogenerate_column_names=False)
Error infos:
ConnectionError Traceback (most recent call last)
<ipython-input-17-bbdadb9a0c78> in <module>
----> 1 dataset = load_dataset('csv', data_files='./test.csv', delimiter=',', autogenerate_column_names=False)
~/.conda/envs/py36/lib/python3.6/site-packages/datasets/load.py in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, ignore_verifications, save_infos, script_version, **config_kwargs)
588 # Download/copy dataset processing script
589 module_path, hash = prepare_module(
--> 590 path, script_version=script_version, download_config=download_config, download_mode=download_mode, dataset=True
591 )
592
~/.conda/envs/py36/lib/python3.6/site-packages/datasets/load.py in prepare_module(path, script_version, download_config, download_mode, dataset, force_local_path, **download_kwargs)
266 file_path = hf_github_url(path=path, name=name, dataset=dataset, version=script_version)
267 try:
--> 268 local_path = cached_path(file_path, download_config=download_config)
269 except FileNotFoundError:
270 if script_version is not None:
~/.conda/envs/py36/lib/python3.6/site-packages/datasets/utils/file_utils.py in cached_path(url_or_filename, download_config, **download_kwargs)
306 user_agent=download_config.user_agent,
307 local_files_only=download_config.local_files_only,
--> 308 use_etag=download_config.use_etag,
309 )
310 elif os.path.exists(url_or_filename):
~/.conda/envs/py36/lib/python3.6/site-packages/datasets/utils/file_utils.py in get_from_cache(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag)
473 elif response is not None and response.status_code == 404:
474 raise FileNotFoundError("Couldn't find file at {}".format(url))
--> 475 raise ConnectionError("Couldn't reach {}".format(url))
476
477 # Try a second time
ConnectionError: Couldn't reach https://raw.githubusercontent.com/huggingface/datasets/1.1.2/datasets/csv/csv.py
And I try to connect to the site with requests:
import requests
requests.head("https://raw.githubusercontent.com/huggingface/datasets/1.1.2/datasets/csv/csv.py")
Similarly Error occurs:
---------------------------------------------------------------------------
ConnectionRefusedError Traceback (most recent call last)
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
159 conn = connection.create_connection(
--> 160 (self._dns_host, self.port), self.timeout, **extra_kw
161 )
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
83 if err is not None:
---> 84 raise err
85
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
73 sock.bind(source_address)
---> 74 sock.connect(sa)
75 return sock
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
676 headers=headers,
--> 677 chunked=chunked,
678 )
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
975 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 976 conn.connect()
977
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connection.py in connect(self)
307 # Add certificate verification
--> 308 conn = self._new_conn()
309 hostname = self.host
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connection.py in _new_conn(self)
171 raise NewConnectionError(
--> 172 self, "Failed to establish a new connection: %s" % e
173 )
NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f3cceda5e48>: Failed to establish a new connection: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
~/.conda/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
724 retries = retries.increment(
--> 725 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
726 )
~/.conda/envs/py36/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
438 if new_retry.is_exhausted():
--> 439 raise MaxRetryError(_pool, url, error or ResponseError(cause))
440
MaxRetryError: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Max retries exceeded with url: /huggingface/datasets/1.1.2/datasets/csv/csv.py (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3cceda5e48>: Failed to establish a new connection: [Errno 111] Connection refused',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-20-18cc3eb4a049> in <module>
1 import requests
2
----> 3 requests.head("https://raw.githubusercontent.com/huggingface/datasets/1.1.2/datasets/csv/csv.py")
~/.conda/envs/py36/lib/python3.6/site-packages/requests/api.py in head(url, **kwargs)
102
103 kwargs.setdefault('allow_redirects', False)
--> 104 return request('head', url, **kwargs)
105
106
~/.conda/envs/py36/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
63
~/.conda/envs/py36/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
528 }
529 send_kwargs.update(settings)
--> 530 resp = self.send(prep, **send_kwargs)
531
532 return resp
~/.conda/envs/py36/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
641
642 # Send the request
--> 643 r = adapter.send(request, **kwargs)
644
645 # Total elapsed time of the request (approximately)
~/.conda/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
514 raise SSLError(e, request=request)
515
--> 516 raise ConnectionError(e, request=request)
517
518 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Max retries exceeded with url: /huggingface/datasets/1.1.2/datasets/csv/csv.py (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f3cceda5e48>: Failed to establish a new connection: [Errno 111] Connection refused',))
Issue Analytics
- State:
- Created 3 years ago
- Comments:11 (3 by maintainers)
Top Results From Across the Web
Seaborn load_dataset - python - Stack Overflow
load_dataset looks for online csv files on https://github.com/mwaskom/seaborn-data. Here's the docstring: Load a dataset from the online ...
Read more >Solved: Error - Get Data / Folder - csv files - Microsoft Power BI ...
Solved: Hello Experts, I'm trying to get the data from my local machine from Folder, which has around 50 csv files (same format)....
Read more >Accessing data in the Kaggle script
I have imported data in my kaggle account but while reading using read_csv I am getting below error FileNotFoundError: [Errno 2] File ...
Read more >Create an SSRS Report using a CSV Data Source (overcome ...
4.0' provider is not registered on the local machine. 3) ERROR [IM002] [Microsoft][ODBC Driver Manager] Data source name not found and no ...
Read more >Load - Hugging Face
Your data can be stored in various places; they can be on your local machine's ... This dataset repository contains CSV files, and...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
你把那个脚本下载到你本地安装目录下,然后
load_dataset(csv_script_path, data_fiels)
Thanks you!, It works very well.