Consistent deadlock with `shuffle="p2p` when merging dataframes with many partitions
See original GitHub issueWhat happened:
The code below (needs typer
in addition to usual dask/distributed/pandas/numpy) pretty consistently hangs after a worker AssertionError
when using the p2p
shuffle option. If I have both many workers, and many partitions per worker. In particular on a 40 physical core Broadwell machine with plentiful (1TB) RAM, the following execution nearly always crashes and then hangs:
$ python p2p-shuffle-hang.py --num-workers 40 --rows-per-worker 5_000_000 --partitions-per-worker 100 --shuffle-type p2p
...
2022-08-31 06:34:33,674 - distributed.worker - WARNING - Compute Failed
Key: ('shuffle-unpack-737f7325f3b9ae355d9fcea2be0ab659', 2068)
Function: shuffle_unpack
args: ('737f7325f3b9ae355d9fcea2be0ab659', 2068, None)
kwargs: {}
Exception: 'AssertionError()'
2022-08-31 06:34:37,712 - distributed.worker - WARNING - Compute Failed
Key: ('shuffle-unpack-737f7325f3b9ae355d9fcea2be0ab659', 264)
Function: shuffle_unpack
args: ('737f7325f3b9ae355d9fcea2be0ab659', 264, None)
kwargs: {}
Exception: 'AssertionError()'
At which point the dashboard shows that no tasks are processing (presumably because they are waiting for these now failed tasks), cluster dump attached below.
On the same system I could also reproduce with --num-workers 4 --partitions-per-worker 1000
, though I was not able to on a different system (which has a faster disk and RAM).
Minimal Complete Verifiable Example:
Reproducer
import math
from enum import Enum, IntEnum, auto
from itertools import repeat
from typing import cast
import typer
import numpy as np
import pandas as pd
from dask.base import tokenize
from dask.dataframe.core import DataFrame
from distributed import Client, LocalCluster
from distributed.client import _wait, ALL_COMPLETED
class Type(IntEnum):
LEFT = auto()
RIGHT = auto()
def make_chunk(chunk_index, size, npartition, typ, match_fraction):
if typ == Type.LEFT:
start = size * chunk_index
stop = start + size
key = np.arange(start, stop, dtype=np.int64)
value = np.random.randint(0, 2000, size=size, dtype=np.int64)
return pd.DataFrame({"key": key, "value": value})
elif typ == Type.RIGHT:
sub_size = size // npartition
to_use = max(math.ceil(sub_size * match_fraction), 1)
arrays = []
for i in range(npartition):
start = size * i + (sub_size * chunk_index)
end = start + sub_size
arrays.append(
np.random.permutation(np.arange(start, end, dtype=np.int64)[:to_use])
)
key_match = np.concatenate(arrays, axis=0)
(got,) = key_match.shape
missing = size - got
start = size * npartition + size * chunk_index
end = start + missing
key_no_match = np.arange(start, end, dtype=np.int64)
key = np.concatenate([key_match, key_no_match], axis=0)
value = np.random.randint(0, 2000, size=size, dtype=np.int64)
return pd.DataFrame({"key": key, "value": value})
else:
raise ValueError(f"Unknown dataframe type {typ}")
def make_ddf(chunk_size, npartition, match_fraction, typ):
meta = pd.DataFrame(
{"key": np.empty(0, dtype=np.int64), "value": np.empty(0, dtype=np.int64)}
)
divisions = list(repeat(None, npartition + 1))
name = "generate-data-" + tokenize(chunk_size, npartition, match_fraction, typ)
dsk = {
(name, i): (make_chunk, i, chunk_size, npartition, typ, match_fraction)
for i in range(npartition)
}
return DataFrame(dsk, name, meta, divisions)
class ShuffleType(str, Enum):
P2P = "p2p"
DEFAULT = "default"
def main(
num_workers: int = typer.Option(
1, help="Number of workers"
),
rows_per_worker: int = typer.Option(
5_000_000, help="Total dataframe rows per worker"
),
partitions_per_worker: int = typer.Option(
1, help="Number of partitions per worker"
),
shuffle_type: ShuffleType = typer.Option(
None, help="Dask shuffle implementation"
)
):
cluster = LocalCluster(n_workers=num_workers, threads_per_worker=1)
client = Client(cluster, set_as_default=False)
rows_per_chunk = rows_per_worker // partitions_per_worker
npartition = partitions_per_worker * num_workers
left = make_ddf(rows_per_chunk, npartition, 0.3, Type.LEFT)
right = make_ddf(rows_per_chunk, npartition, 0.3, Type.RIGHT)
left = cast(DataFrame, client.persist(left))
right = cast(DataFrame, client.persist(right))
_ = client.sync(_wait, left, timeout=None, return_when=ALL_COMPLETED)
_ = client.sync(_wait, right, timeout=None, return_when=ALL_COMPLETED)
shuffle = {ShuffleType.DEFAULT: None}.get(shuffle_type, shuffle_type)
merged = left.merge(right, on=["key"], how="inner", shuffle=shuffle)
merged = client.persist(merged)
_ = client.sync(_wait, merged, timeout=None, return_when=ALL_COMPLETED)
del cluster
client.close()
client.shutdown()
del client
if __name__ == "__main__":
client = typer.run(main)
Environment:
- Dask version:
2022.8.1+7.g19a51474c
- Distributed version:
2022.8.1+29.ga5d68657
- Python version:
3.9.13 | packaged by conda-forge | (main, May 27 2022, 16:56:21) \n[GCC 10.3.0]
- Operating System: Ubuntu 18.04
- Install method (conda, pip, source): conda (
dask/label/dev
channel)
Cluster Dump State:
Issue Analytics
- State:
- Created a year ago
- Comments:10 (8 by maintainers)
Top GitHub Comments
Running on that branch I’m unable to reproduce the original error and (after a couple of repeats) have yet to see any hangs.
Should be closed after https://github.com/dask/distributed/pull/7268 Please reopen if the issue is not resolved