Integration with Datashader issues (on_change, layout.image updates)
See original GitHub issueFirst and foremost, awesome job with the tool, it looks terrific.
Jumping right in on the issue, I’m trying to integrate plotly with datashader, following mostly this article right here .
So, as far as I know there are two solutions to this problem:
- You keep updating the image itself (see
update_image
function below); - You keep updating the data, aka doing the aggregation step again (see
update_timeseries
function below);
The two solutions produce different problems. The first solution (updating the image), seems like not all points are displayed, being cutoff at a certain range. You can see it here:
As for the second solution (updating the data) the on_change
callback is being called repeatedly, without any changes made to the plot whatsoever. This also happens when autoscaling
or resetting axes
. You can see it here:
The plot is being zoomed in bit by bit (without me doing anything) which calls the on_change
over and over.
Below you can use the code that I’m using to reproduce this.
Thanks in advance!
import plotly.graph_objs as go
import pandas as pd
import numpy as np
import datashader as ds
from datetime import datetime
import datashader.transfer_functions as tf
import dask.dataframe as dd
from math import floor
global plot_obj
plot_obj = None
global df
df = None
def generate_df(size=100000):
d = {
'dates' : pd.date_range('2015-01-01', periods=size, freq='1min'),
'unique_id' : np.arange(0, size),
'ints' : np.random.randint(0, size, size=size),
'floats' : np.random.randn(size),
'bools' : np.random.choice([0, 1], size=size),
'int_nans' : np.random.choice([0, 1, np.nan], size=size),
'float_nans' : np.random.choice([0.0, 1.0, np.nan], size=size),
'constant' : 1,
'categorical' : np.random.choice([10, 20, 30, 40, 50], size=size) ,
'categorical_binary' : np.random.choice(['a', 'b'], size=size),
#'categorical_nans' : np.random.choice(['a', 'b', np.nan], size=size)
'categorical_nans' : np.random.choice(['a', 'b', 'c'], size=size)
}
df = pd.DataFrame(d)
# df['hardbools'] = df['bools'] == 1
df['categorical_nans'] = df['categorical_nans'].replace('c', np.nan)
# df['hardcategorical_nans'] = df['categorical_nans'].astype('category')
df['categorical_binary'] = df['categorical_binary'].astype('category')
df['categorical_nans'] = df['categorical_nans'].astype('category')
# df = df.set_index('dates')
df['dates_int'] = df['dates'].astype('int64')
globals()['df'] = df
return df
# ===================================DATA==========================
def update_timeseries(layout, x_range, y_range, plot_width, plot_height):
print(f"On change")
x_range = [
int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000),
int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)
]
y_range = [
plot_obj.layout.yaxis.range[0],
plot_obj.layout.yaxis.range[1]]
plot_width = floor(plot_width)
plot_height = floor(plot_height)
cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())
pq = agg_scatter.to_pandas().transpose()
a = pq.stack().reset_index()
q = a.loc[a[a.columns[2]]==True]
ew = [datetime.fromtimestamp(item/1000000000) for item in q.dates_int.values]
with plot_obj.batch_update():
plot_obj.data[0].x = ew
plot_obj.data[0].y = q.floats.values
plot_obj.layout.xaxis.range = (ew[0], ew[-1])
plot_obj.layout.yaxis.range = (q.floats.min(), q.floats.max())
def datashader_data(size=1000000):
df = generate_df(size=size)
x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
y_range=[df.floats.min(), df.floats.max()]
plot_height=300
plot_width=300
cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())
agg_scatter = agg_scatter.to_pandas().transpose()
agg_scatter = agg_scatter.stack().reset_index()
# select only pixels where data points are present
agg_scatter = agg_scatter.loc[agg_scatter[agg_scatter.columns[2]]==True]
# init plot
d = go.Scatter(
x=agg_scatter.dates_int.astype('int64').astype('<M8[ns]'),
y=agg_scatter.floats)#, mode='markers')
f = go.FigureWidget(data=[d])
# add callback
f.layout.on_change(update_timeseries, 'xaxis.range', 'yaxis.range', 'width', 'height')
f.layout.dragmode = 'zoom'
globals()['plot_obj'] = f
return f
# ===============================IMAGE===============================
def gen_ds_image(x_range, y_range, plot_width, plot_height):
if x_range is None or y_range is None or plot_width is None or plot_height is None:
return None
cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())
img = tf.shade(agg_scatter)
img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
return img.to_pil()
def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
print(f"On change")
img = plot_obj.layout.images[0]
x_range = [
int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000),
int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)]
y_range = [plot_obj.layout.yaxis.range[0], plot_obj.layout.yaxis.range[1]]
# Update with batch_update so all updates happen simultaneously
with plot_obj.batch_update():
img.x = x_range[0]
img.y = y_range[1]
img.sizex = x_range[1] - x_range[0]
img.sizey = y_range[1] - y_range[0]
img.source = gen_ds_image(x_range, y_range, plot_width, plot_height)
def datashader_image(size=100000):
df = generate_df(size=size)
x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
y_range=[df.floats.min(), df.floats.max()]
plot_height=600
plot_width=1200
initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height)
# init plot
f = go.FigureWidget(
data=[{
'x': [
datetime.fromtimestamp(df.head(1).dates.values[0].astype('int64')/1000000000),
datetime.fromtimestamp(df.tail(1).dates.values[0].astype('int64')/1000000000)
],
'y': y_range,
'mode': 'markers',
'marker': {'opacity': 0} # invisible trace to init axes and to support autoresize
}],
layout={'width': plot_width, 'height': plot_height}
)
# add image to plot
f.layout.images = [
go.layout.Image(
source = initial_img, # plotly now performs auto conversion of PIL image to png data URI
xref = "x",
yref = "y",
x = x_range[0],
y = y_range[1],
sizex = x_range[1] - x_range[0],
sizey = y_range[1] - y_range[0],
sizing = "contain",
layer = "below")
]
f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height')
f.layout.dragmode = 'zoom'
globals()['plot_obj'] = f
return f
Issue Analytics
- State:
- Created 5 years ago
- Reactions:1
- Comments:6 (4 by maintainers)
Top GitHub Comments
Hi @pedroallenrevez , I think I figured out what’s going on with your datashader image example.
First, as I mentioned above, the sizing mode should be
'stretch'
. And this does work with date axes after all, the problem is a difference in time representation. When expressed as integers, plotly.js needs time specified in milliseconds, rather than nanoseconds as is returned when adatetime64
is cast to anint64
.Here’s an updated example
Could you take another look at your
datashader_data
and see if this time discrepancy explains what you’re seeing there?I don’t think I quite follow what you’re intending to happen in the
datashader_image
example. When I run it I see the plot flash and automatically zoom in until no data is in view. It looks like you’re setting the axis ranges in the callback that response to axis range changes, which I wold expect to cause some problems like this.