RuntimeError: DataLoader worker (pid(s) 18864, 2468, 18668, 19464, 1744, 14436, 8604) exited unexpectedly
See original GitHub issuePython 3.8.5 OS: windows 10 Code to reproduce
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=700)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
train_data=pd.concat([pd.DataFrame(X_train),pd.DataFrame(y_train)],axis=1)
train_data.columns=['col{}'.format(i) for i in range(X_train.shape[1])]+['class']
predictor = TabularPredictor(label='class').fit(train_data, time_limit=30)
I got this error RuntimeError: DataLoader worker (pid(s) 18864, 2468, 18668, 19464, 1744, 14436, 8604) exited unexpectedly
Error Log
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
d:\tutorials\automl\venv\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout)
871 try:
--> 872 data = self._data_queue.get(timeout=timeout)
873 return (True, data)
C:\Anaconda3\lib\queue.py in get(self, block, timeout)
177 if remaining <= 0.0:
--> 178 raise Empty
179 self.not_empty.wait(remaining)
Empty:
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-7-78e78194449c> in <module>
----> 1 predictor = TabularPredictor(label='class').fit(train_data, time_limit=30)
d:\tutorials\automl\venv\lib\site-packages\autogluon\core\utils\decorators.py in _call(*args, **kwargs)
27 def _call(*args, **kwargs):
28 gargs, gkwargs = g(*other_args, *args, **kwargs)
---> 29 return f(*gargs, **gkwargs)
30 return _call
31 return _unpack_inner
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\predictor\predictor.py in fit(self, train_data, tuning_data, time_limit, presets, hyperparameters, feature_metadata, **kwargs)
687
688 core_kwargs = {'ag_args': ag_args, 'ag_args_ensemble': ag_args_ensemble, 'ag_args_fit': ag_args_fit, 'excluded_model_types': excluded_model_types}
--> 689 self._learner.fit(X=train_data, X_val=tuning_data, X_unlabeled=unlabeled_data,
690 holdout_frac=holdout_frac, num_bag_folds=num_bag_folds, num_bag_sets=num_bag_sets, num_stack_levels=num_stack_levels,
691 hyperparameters=hyperparameters, core_kwargs=core_kwargs, time_limit=time_limit, verbosity=verbosity)
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\learner\abstract_learner.py in fit(self, X, X_val, **kwargs)
124 raise AssertionError('Learner is already fit.')
125 self._validate_fit_input(X=X, X_val=X_val, **kwargs)
--> 126 return self._fit(X=X, X_val=X_val, **kwargs)
127
128 def _fit(self, X: DataFrame, X_val: DataFrame = None, scheduler_options=None, hyperparameter_tune=False,
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\learner\default_learner.py in _fit(self, X, X_val, X_unlabeled, holdout_frac, num_bag_folds, num_bag_sets, time_limit, verbosity, **trainer_fit_kwargs)
93
94 self.save()
---> 95 trainer.fit(X, y, X_val=X_val, y_val=y_val, X_unlabeled=X_unlabeled, holdout_frac=holdout_frac, time_limit=time_limit_trainer, **trainer_fit_kwargs)
96 self.save_trainer(trainer=trainer)
97 time_end = time.time()
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\auto_trainer.py in fit(self, X, y, hyperparameters, X_val, y_val, X_unlabeled, feature_prune, holdout_frac, num_stack_levels, core_kwargs, time_limit, **kwargs)
48 logger.log(20, f'Automatically generating train/validation split with holdout_frac={holdout_frac}, Train Rows: {len(X)}, Val Rows: {len(X_val)}')
49
---> 50 self._train_multi_and_ensemble(X, y, X_val, y_val, X_unlabeled=X_unlabeled, hyperparameters=hyperparameters,
51 feature_prune=feature_prune,
52 num_stack_levels=num_stack_levels, time_limit=time_limit, core_kwargs=core_kwargs)
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in _train_multi_and_ensemble(self, X, y, X_val, y_val, hyperparameters, X_unlabeled, num_stack_levels, time_limit, **kwargs)
1289 self._num_rows_train += len(X_val)
1290 self._num_cols_train = len(list(X.columns))
-> 1291 model_names_fit = self.train_multi_levels(X, y, hyperparameters=hyperparameters, X_val=X_val, y_val=y_val,
1292 X_unlabeled=X_unlabeled, level_start=1, level_end=num_stack_levels+1, time_limit=time_limit, **kwargs)
1293 if len(self.get_model_names()) == 0:
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in train_multi_levels(self, X, y, hyperparameters, X_val, y_val, X_unlabeled, base_model_names, feature_prune, core_kwargs, aux_kwargs, level_start, level_end, time_limit, name_suffix, relative_stack)
255 if level != 1:
256 feature_prune = False # TODO: Enable feature prune on levels > 1
--> 257 base_model_names, aux_models = self.stack_new_level(
258 X=X, y=y, X_val=X_val, y_val=y_val, X_unlabeled=X_unlabeled,
259 models=hyperparameters, level=level, base_model_names=base_model_names,
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in stack_new_level(self, X, y, models, X_val, y_val, X_unlabeled, level, base_model_names, feature_prune, core_kwargs, aux_kwargs, name_suffix)
290 aux_models = self.stack_new_level_aux(X=X, y=y, base_model_names=core_models, level=level+1, **aux_kwargs)
291 else:
--> 292 aux_models = self.stack_new_level_aux(X=X_val, y=y_val, fit=False, base_model_names=core_models, level=level+1, **aux_kwargs)
293 return core_models, aux_models
294
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in stack_new_level_aux(self, X, y, base_model_names, level, fit, stack_name, time_limit, name_suffix, get_models_func, check_if_best)
367 Auxiliary models never use the original features and only train with the predictions of other models as features.
368 """
--> 369 X_stack_preds = self.get_inputs_to_stacker(X, base_models=base_model_names, fit=fit, use_orig_features=False)
370 if self.weight_evaluation:
371 X, w = extract_column(X, self.sample_weight) # TODO: consider redesign with w as separate arg instead of bundled inside X
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in get_inputs_to_stacker(self, X, base_models, model_pred_proba_dict, fit, use_orig_features)
503 base_models = []
504 if not fit:
--> 505 model_pred_proba_dict = self.get_model_pred_proba_dict(X=X, models=base_models, model_pred_proba_dict=model_pred_proba_dict)
506 model_pred_proba_list = [model_pred_proba_dict[model] for model in base_models]
507 else:
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\trainer\abstract_trainer.py in get_model_pred_proba_dict(self, X, models, model_pred_proba_dict, model_pred_time_dict, fit, record_pred_time)
487 model_pred_proba_dict[model_name] = model.predict_proba(X, **preprocess_kwargs)
488 else:
--> 489 model_pred_proba_dict[model_name] = model.predict_proba(X)
490
491 if record_pred_time:
d:\tutorials\automl\venv\lib\site-packages\autogluon\core\models\abstract\abstract_model.py in predict_proba(self, X, normalize, **kwargs)
456 if normalize is None:
457 normalize = self.normalize_pred_probas
--> 458 y_pred_proba = self._predict_proba(X=X, **kwargs)
459 if normalize:
460 y_pred_proba = normalize_pred_probas(y_pred_proba, self.problem_type)
d:\tutorials\automl\venv\lib\site-packages\autogluon\tabular\models\fastainn\tabular_nn_fastai.py in _predict_proba(self, X, **kwargs)
351 X, cat_names=self.cat_columns.copy(), cont_names=self.cont_columns.copy(), procs=self.procs))
352 with progress_disabled_ctx(self.model) as model:
--> 353 preds, _ = model.get_preds(ds_type=DatasetType.Test)
354 if single_row:
355 preds = preds[:1, :]
d:\tutorials\automl\venv\lib\site-packages\fastai\basic_train.py in get_preds(self, ds_type, activ, with_loss, n_batch, pbar)
338 if not getattr(self, 'opt', False): self.create_opt(defaults.lr, self.wd)
339 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(self.callbacks)
--> 340 return get_preds(self.model, self.dl(ds_type), cb_handler=CallbackHandler(callbacks),
341 activ=activ, loss_func=lf, n_batch=n_batch, pbar=pbar)
342
d:\tutorials\automl\venv\lib\site-packages\fastai\basic_train.py in get_preds(model, dl, pbar, cb_handler, activ, loss_func, n_batch)
42 "Tuple of predictions and targets, and optional losses (if `loss_func`) using `dl`, max batches `n_batch`."
43 res = [to_float(torch.cat(o).cpu()) for o in
---> 44 zip(*validate(model, dl, cb_handler=cb_handler, pbar=pbar, average=False, n_batch=n_batch))]
45 if loss_func is not None:
46 with NoneReduceOnCPU(loss_func) as lf: res.append(lf(res[0], res[1]))
d:\tutorials\automl\venv\lib\site-packages\fastai\basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
55 val_losses,nums = [],[]
56 if cb_handler: cb_handler.set_dl(dl)
---> 57 for xb,yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)):
58 if cb_handler: xb, yb = cb_handler.on_batch_begin(xb, yb, train=False)
59 val_loss = loss_batch(model, xb, yb, loss_func, cb_handler=cb_handler)
d:\tutorials\automl\venv\lib\site-packages\fastprogress\fastprogress.py in __iter__(self)
45 except Exception as e:
46 self.on_interrupt()
---> 47 raise e
48
49 def update(self, val):
d:\tutorials\automl\venv\lib\site-packages\fastprogress\fastprogress.py in __iter__(self)
39 if self.total != 0: self.update(0)
40 try:
---> 41 for i,o in enumerate(self.gen):
42 if i >= self.total: break
43 yield o
d:\tutorials\automl\venv\lib\site-packages\fastai\basic_data.py in __iter__(self)
73 def __iter__(self):
74 "Process and returns items from `DataLoader`."
---> 75 for b in self.dl: yield self.proc_batch(b)
76
77 @classmethod
d:\tutorials\automl\venv\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
d:\tutorials\automl\venv\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
1066
1067 assert not self._shutdown and self._tasks_outstanding > 0
-> 1068 idx, data = self._get_data()
1069 self._tasks_outstanding -= 1
1070 if self._dataset_kind == _DatasetKind.Iterable:
d:\tutorials\automl\venv\lib\site-packages\torch\utils\data\dataloader.py in _get_data(self)
1022 elif self._pin_memory:
1023 while self._pin_memory_thread.is_alive():
-> 1024 success, data = self._try_get_data()
1025 if success:
1026 return data
d:\tutorials\automl\venv\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout)
883 if len(failed_workers) > 0:
884 pids_str = ', '.join(str(w.pid) for w in failed_workers)
--> 885 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
886 if isinstance(e, queue.Empty):
887 return (False, None)
RuntimeError: DataLoader worker (pid(s) 18864, 2468, 18668, 19464, 1744, 14436, 8604) exited unexpectedly
Issue Analytics
- State:
- Created 2 years ago
- Comments:8
Top Results From Across the Web
Runtime Error with DataLoader: exited unexpectedly #5301
RuntimeError : DataLoader worker (pid 30141) exited unexpectedly with exit code 1. However, whenever I run it with 0 workers, it works. Is ......
Read more >PyTorch RuntimeError: DataLoader worker (pid(s) 15332 ...
I am using python 3.8 on Win10(64bit) and pytorch 1.4.0. More complete error messages ( --cuda means using GPU, --threads x means passing...
Read more >How to solve RuntimeError: DataLoader worker (pid(s)) exited ...
I got the RuntimeError: DataLoader worker (pid(s) XXX) exited unexpectedly. RuntimeError Traceback (most recent call last) ...
Read more >DataLoader worker (pid(s) 6740) exited unexpectedly
I am working on WGAN-GP with a data set having 292 images for training. I am facing few problems: 1- Generator's graph seems...
Read more >RuntimeError: DataLoader worker (pid(s) 19716) exited ...
RuntimeError : DataLoader worker (pid(s) 19716) exited unexpectedly ... \site-packages\torch\utils\data\dataloader.py in _try_get_data(self, ...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
The following commands also resolve the issue, so I think the version error mentioned above is because of an outdated pip version
Sounds good, then this will be resolved in the next release.