TEST: CI: dask test_notebooks fails with RuntimeError: Cluster failed to start: No module named 'bokeh'
See original GitHub issueexample failure: https://github.com/modin-project/modin/actions/runs/3322991945/jobs/5551188570
This workflow run passed before the commit was merged, but failed when I ran it just now. I think bokeh is a dependency of dask.
stack trace
RuntimeError Traceback (most recent call last)
Cell In [2], line 13
10 url_path = "https://modin-test.s3.us-west-1.amazonaws.com/yellow_tripdata_2015-01.csv"
11 urllib.request.urlretrieve(url_path, "taxi.csv")
---> 13 modin_df = pd.read_csv(s3_path,parse_dates=["tpep_pickup_datetime","tpep_dropoff_datetime"],quoting=3,nrows=1000)
File ~/work/modin/modin/modin/logging/logger_decorator.py:128, in enable_logging.<locals>.decorator.<locals>.run_and_log(*args, **kwargs)
113 """
114 Compute function with logging if Modin logging is enabled.
115
(...)
125 Any
126 """
127 if LogMode.get() == "disable":
--> 128 return obj(*args,**kwargs)
130 logger = get_logger()
131 logger_level = getattr(logger, log_level)
File ~/work/modin/modin/modin/_compat/pandas_api/latest/io.py:156, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
154 f_locals.pop("mangle_dupe_cols", None)
155 kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
--> 156 return _read(**kwargs)
File ~/work/modin/modin/modin/_compat/pandas_api/common/io.py:35, in _read(**kwargs)
22 def _read(**kwargs):
23 """
24 Read csv file from local disk.
25
(...)
33 modin.pandas.DataFrame
34 """
---> 35 Engine.subscribe(_update_engine)
36 from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
38 squeeze = kwargs.pop("squeeze", False)
File ~/work/modin/modin/modin/config/pubsub.py:217, in Parameter.subscribe(cls, callback)
208 """
209 Add `callback` to the `_subs` list and then execute it.
210
(...)
214 Callable to execute.
215 """
216 cls._subs.append(callback)
--> 217 callback(cls)
File ~/work/modin/modin/modin/pandas/__init__.py:161, in _update_engine(publisher)
158 if _is_first_update.get("Dask", True):
159 from modin.core.execution.dask.common import initialize_dask
--> 161 initialize_dask()
162 elif publisher.get() == "Cloudray":
163 from modin.experimental.cloud import get_connection
File ~/work/modin/modin/modin/core/execution/dask/common/utils.py:47, in initialize_dask()
45 memory_limit = Memory.get()
46 worker_memory_limit = memory_limit // num_cpus if memory_limit else "auto"
---> 47 client = Client(n_workers=num_cpus,memory_limit=worker_memory_limit)
49 num_cpus = len(client.ncores())
50 NPartitions._put(num_cpus)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:982, in Client.__init__(self, address, loop, timeout, set_as_default, scheduler_file, security, asynchronous, name, heartbeat_interval, serializers, deserializers, extensions, direct_to_workers, connection_limit, **kwargs)
979 preload_argv = dask.config.get("distributed.client.preload-argv")
980 self.preloads = preloading.process_preloads(self, preload, preload_argv)
--> 982 self.start(timeout=timeout)
983 Client._instances.add(self)
985 from distributed.recreate_tasks import ReplayTaskClient
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:1172, in Client.start(self, **kwargs)
1170 self._started = asyncio.ensure_future(self._start(**kwargs))
1171 else:
-> 1172 sync(self.loop,self._start,**kwargs)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/utils.py:406, in sync(loop, func, callback_timeout, *args, **kwargs)
404 if error:
405 typ, exc, tb = error
--> 406 raise exc.with_traceback(tb)
407 else:
408 return result
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/utils.py:379, in sync.<locals>.f()
377 future = asyncio.wait_for(future, callback_timeout)
378 future = asyncio.ensure_future(future)
--> 379 result = yield future
380 except Exception:
381 error = sys.exc_info()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/tornado/gen.py:762, in Runner.run(self)
759 exc_info = None
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/client.py:1238, in Client._start(self, timeout, **kwargs)
1235 elif self._start_arg is None:
1236 from distributed.deploy import LocalCluster
-> 1238 self.cluster = await LocalCluster(
1239 loop=self.loop,
1240 asynchronous=self._asynchronous,
1241 **self._startup_kwargs,
1242 )
1243 address = self.cluster.scheduler_address
1245 self._gather_semaphore = asyncio.Semaphore(5)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/deploy/spec.py:398, in SpecCluster.__await__.<locals>._()
396 async def _():
397 if self.status == Status.created:
--> 398 await self._start()
399 await self.scheduler
400 await self._correct_state()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/distributed/deploy/spec.py:319, in SpecCluster._start(self)
317 self.status = Status.failed
318 await self._close()
--> 319 raise RuntimeError(f"Cluster failed to start: {e}") from e
RuntimeError: Cluster failed to start: No module named 'bokeh'
RuntimeError: Cluster failed to start: No module named 'bokeh'
============================== 4 failed in 58.87s ==============================
Issue Analytics
- State:
- Created a year ago
- Comments:5 (2 by maintainers)
Top Results From Across the Web
No module named 'bokeh' Many published solutions tried but ...
I am not sure why but I am having difficulties getting bokeh ... The error states: ModuleNotFoundError <ipython-input-35-0f0c4b610685> in ...
Read more >Error No module named bokeh - Odoo
Hi, I get this error when I try to install an application: Odoo Server Error ... because there is an unresolved external dependency:...
Read more >No module named 'bokeh.plotting.figure on Streamlit Cloud
Hello, Several months ago i developed a streamlit multipage app and deployed it through Streamlit cloud. All worked fine, till 4-5 days ago, ......
Read more >no module named 'distributed' - You.com | The AI Search ...
Making a fresh conda environment with dask[complete] seems to have worked. Open side panel. No module named torch.
Read more >Unrecognized resources bokeh - HoloViz Discourse
hv.extension('bokeh', 'matplotlib'). WARNING:param.notebook_extension: Holoviews bokeh extension could not be imported, it raised the ...
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
This has been resolved upstream in Dask. My apologies for the issue. The previous release has been yanked from PyPI and a new release has been published. It is on PyPI now and should be on conda-forge shortly.
I also got this error in pandera CI: https://github.com/unionai-oss/pandera/actions/runs/3348692649/jobs/5549781070