Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

Integration with Datashader issues (on_change, layout.image updates)

See original GitHub issue

First and foremost, awesome job with the tool, it looks terrific.

Jumping right in on the issue, I’m trying to integrate plotly with datashader, following mostly this article right here .

So, as far as I know there are two solutions to this problem:

You keep updating the image itself (see update_image function below);
You keep updating the data, aka doing the aggregation step again (see update_timeseries function below);

The two solutions produce different problems. The first solution (updating the image), seems like not all points are displayed, being cutoff at a certain range. You can see it here:

screen shot 2018-09-28 at 17 04 48

As for the second solution (updating the data) the on_change callback is being called repeatedly, without any changes made to the plot whatsoever. This also happens when autoscaling or resetting axes. You can see it here:

screen shot 2018-09-28 at 17 08 50

The plot is being zoomed in bit by bit (without me doing anything) which calls the on_change over and over.

Below you can use the code that I’m using to reproduce this.

Thanks in advance!

import plotly.graph_objs as go
import pandas as pd
import numpy as np
import datashader as ds
from datetime import datetime
import datashader.transfer_functions as tf
import dask.dataframe as dd
from math import floor

global plot_obj
plot_obj = None

global df
df = None

def generate_df(size=100000):
    d = {    
        'dates' : pd.date_range('2015-01-01', periods=size, freq='1min'),
        'unique_id' : np.arange(0, size),
        'ints' : np.random.randint(0, size, size=size),
        'floats' : np.random.randn(size),
        'bools' : np.random.choice([0, 1], size=size),
        'int_nans' : np.random.choice([0, 1, np.nan], size=size),
        'float_nans' : np.random.choice([0.0, 1.0, np.nan], size=size),
        'constant' : 1, 
        'categorical' : np.random.choice([10, 20, 30, 40, 50], size=size) , 
        'categorical_binary' : np.random.choice(['a', 'b'], size=size), 
        #'categorical_nans' : np.random.choice(['a', 'b', np.nan], size=size)
        'categorical_nans' : np.random.choice(['a', 'b', 'c'], size=size)
    }

    df = pd.DataFrame(d)
    # df['hardbools'] = df['bools'] == 1
    df['categorical_nans'] = df['categorical_nans'].replace('c', np.nan)
    # df['hardcategorical_nans'] = df['categorical_nans'].astype('category')
    df['categorical_binary'] = df['categorical_binary'].astype('category')
    df['categorical_nans'] = df['categorical_nans'].astype('category')

    # df = df.set_index('dates')
    df['dates_int'] = df['dates'].astype('int64')
    globals()['df'] = df
    return df

# ===================================DATA==========================
def update_timeseries(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    
    x_range = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)
    ]
    y_range = [
            plot_obj.layout.yaxis.range[0],
            plot_obj.layout.yaxis.range[1]]

    plot_width = floor(plot_width)
    plot_height = floor(plot_height)

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    pq = agg_scatter.to_pandas().transpose()
    a = pq.stack().reset_index()
    q = a.loc[a[a.columns[2]]==True]

    ew = [datetime.fromtimestamp(item/1000000000) for item in q.dates_int.values]

    with plot_obj.batch_update():
        plot_obj.data[0].x = ew
        plot_obj.data[0].y = q.floats.values
        plot_obj.layout.xaxis.range = (ew[0], ew[-1])
        plot_obj.layout.yaxis.range = (q.floats.min(), q.floats.max())

def datashader_data(size=1000000):
    df = generate_df(size=size)

    x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=300
    plot_width=300

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())


    agg_scatter = agg_scatter.to_pandas().transpose()
    agg_scatter = agg_scatter.stack().reset_index()
    # select only pixels where data points are present
    agg_scatter = agg_scatter.loc[agg_scatter[agg_scatter.columns[2]]==True]

    # init plot
    d = go.Scatter(
            x=agg_scatter.dates_int.astype('int64').astype('<M8[ns]'), 
            y=agg_scatter.floats)#, mode='markers')
    f = go.FigureWidget(data=[d])

    # add callback
    f.layout.on_change(update_timeseries, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'
    
    globals()['plot_obj'] = f

    return f

# ===============================IMAGE===============================
def gen_ds_image(x_range, y_range, plot_width, plot_height):
    if x_range is None or y_range is None or plot_width is None or plot_height is None:
        return None
    
    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    img = tf.shade(agg_scatter)
    img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
    
    return img.to_pil()

def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    img = plot_obj.layout.images[0]
    
    x_range = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)]
    y_range = [plot_obj.layout.yaxis.range[0], plot_obj.layout.yaxis.range[1]]

    # Update with batch_update so all updates happen simultaneously
    with plot_obj.batch_update():
        img.x = x_range[0]
        img.y = y_range[1]
        img.sizex = x_range[1] - x_range[0]
        img.sizey = y_range[1] - y_range[0]
        img.source = gen_ds_image(x_range, y_range, plot_width, plot_height)


def datashader_image(size=100000):
    df = generate_df(size=size)

    x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=600
    plot_width=1200

    initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height)

    # init plot
    f = go.FigureWidget(
        data=[{
            'x': [
                datetime.fromtimestamp(df.head(1).dates.values[0].astype('int64')/1000000000), 
                datetime.fromtimestamp(df.tail(1).dates.values[0].astype('int64')/1000000000)
                ], 
            'y': y_range, 
            'mode': 'markers',
            'marker': {'opacity': 0} # invisible trace to init axes and to support autoresize
        }], 
        layout={'width': plot_width, 'height': plot_height}
    )

    # add image to plot
    f.layout.images = [
            go.layout.Image(
                source = initial_img,  # plotly now performs auto conversion of PIL image to png data URI
                xref = "x",
                yref = "y",
                x = x_range[0],
                y = y_range[1],
                sizex = x_range[1] - x_range[0],
                sizey = y_range[1] - y_range[0],
                sizing = "contain",
                layer = "below")
            ]

    f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'

    globals()['plot_obj'] = f
    return f

Issue Analytics

State:
Created 5 years ago
Reactions:1
Comments:6 (4 by maintainers)

Top GitHub Comments

1reaction

jonmmeasecommented, Oct 8, 2018

Hi @pedroallenrevez , I think I figured out what’s going on with your datashader image example.

First, as I mentioned above, the sizing mode should be 'stretch'. And this does work with date axes after all, the problem is a difference in time representation. When expressed as integers, plotly.js needs time specified in milliseconds, rather than nanoseconds as is returned when a datetime64 is cast to an int64.

Here’s an updated example

import plotly.graph_objs as go
import pandas as pd
import numpy as np
import datashader as ds
from datetime import datetime
import datashader.transfer_functions as tf
import dask.dataframe as dd
from math import floor

global plot_obj
plot_obj = None

global df
df = None

def generate_df(size=100000):
    d = {    
        'dates' : pd.date_range('2015-01-01', periods=size, freq='1min'),
        'unique_id' : np.arange(0, size),
        'ints' : np.random.randint(0, size, size=size),
        'floats' : np.random.randn(size),
        'bools' : np.random.choice([0, 1], size=size),
        'int_nans' : np.random.choice([0, 1, np.nan], size=size),
        'float_nans' : np.random.choice([0.0, 1.0, np.nan], size=size),
        'constant' : 1, 
        'categorical' : np.random.choice([10, 20, 30, 40, 50], size=size) , 
        'categorical_binary' : np.random.choice(['a', 'b'], size=size), 
        #'categorical_nans' : np.random.choice(['a', 'b', np.nan], size=size)
        'categorical_nans' : np.random.choice(['a', 'b', 'c'], size=size)
    }

    df = pd.DataFrame(d)
    # df['hardbools'] = df['bools'] == 1
    df['categorical_nans'] = df['categorical_nans'].replace('c', np.nan)
    # df['hardcategorical_nans'] = df['categorical_nans'].astype('category')
    df['categorical_binary'] = df['categorical_binary'].astype('category')
    df['categorical_nans'] = df['categorical_nans'].astype('category')

    # df = df.set_index('dates')
    df['dates_int'] = df['dates'].astype('int64')
    globals()['df'] = df
    return df

# ===================================DATA==========================
def update_timeseries(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    
    x_range = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)
    ]
    y_range = [
            plot_obj.layout.yaxis.range[0],
            plot_obj.layout.yaxis.range[1]]

    plot_width = floor(plot_width)
    plot_height = floor(plot_height)

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    pq = agg_scatter.to_pandas().transpose()
    a = pq.stack().reset_index()
    q = a.loc[a[a.columns[2]]==True]

    ew = [datetime.fromtimestamp(item/1000000000) for item in q.dates_int.values]

    with plot_obj.batch_update():
        plot_obj.data[0].x = ew
        plot_obj.data[0].y = q.floats.values
        plot_obj.layout.xaxis.range = (ew[0], ew[-1])
        plot_obj.layout.yaxis.range = (q.floats.min(), q.floats.max())

def datashader_data(size=1000000):
    df = generate_df(size=size)

    x_range=[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=300
    plot_width=300

    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)

    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())


    agg_scatter = agg_scatter.to_pandas().transpose()
    agg_scatter = agg_scatter.stack().reset_index()
    # select only pixels where data points are present
    agg_scatter = agg_scatter.loc[agg_scatter[agg_scatter.columns[2]]==True]

    # init plot
    d = go.Scatter(
            x=agg_scatter.dates_int.astype('int64').astype('<M8[ns]'), 
            y=agg_scatter.floats)#, mode='markers')
    f = go.FigureWidget(data=[d])

    # add callback
    f.layout.on_change(update_timeseries, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'
    
    globals()['plot_obj'] = f

    return f

# ===============================IMAGE===============================
def gen_ds_image(x_range, y_range, plot_width, plot_height):
    if x_range is None or y_range is None or plot_width is None or plot_height is None:
        return None
    
    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
    agg_scatter = cvs.points(df, 'dates_int', 'floats', agg=ds.any())

    img = tf.shade(agg_scatter)
    img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
    
    return img.to_pil()

def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
    print(f"On change")
    img = plot_obj.layout.images[0]
    
    x_range_ns = [
            int(pd.to_datetime(plot_obj.layout.xaxis.range[0]).timestamp()*1000000000), 
            int(pd.to_datetime(plot_obj.layout.xaxis.range[1]).timestamp()*1000000000)]
    
    x_range_ms = [int(v/1000000) for v in x_range_ns]
    y_range = [plot_obj.layout.yaxis.range[0], plot_obj.layout.yaxis.range[1]]

    # Update with batch_update so all updates happen simultaneously
    with plot_obj.batch_update():
        img.x = x_range_ms[0]
        img.y = y_range[1]
        img.sizex = x_range_ms[1] - x_range_ms[0]
        img.sizey = y_range[1] - y_range[0]
        img.source = gen_ds_image(x_range_ns, y_range, plot_width, plot_height)


def datashader_image(size=100000):
    df = generate_df(size=size)

    x_range_ns =[df.head(1).dates.values[0].astype('int64'), df.tail(1).dates.values[0].astype('int64')]
    x_range_ms = [int(v/1000000) for v in x_range_ns]
    y_range=[df.floats.min(), df.floats.max()]
    plot_height=600
    plot_width=1000

    initial_img = gen_ds_image(x_range_ns, y_range, plot_width, plot_height)

    # init plot
    f = go.FigureWidget(
        data=[{
            'x': [
                datetime.fromtimestamp(df.head(1).dates.values[0].astype('int64')/1000000000), 
                datetime.fromtimestamp(df.tail(1).dates.values[0].astype('int64')/1000000000)
                ], 
            'y': y_range, 
            'mode': 'markers',
            'marker': {'opacity': 0} # invisible trace to init axes and to support autoresize
        }], 
        layout={'width': plot_width, 'height': plot_height}
    )

    # add image to plot
    f.layout.images = [
            go.layout.Image(
                source = initial_img,  # plotly now performs auto conversion of PIL image to png data URI
                xref = "x",
                yref = "y",
                x = x_range_ms[0],
                y = y_range[1],
                sizex = x_range_ms[1] - x_range_ms[0],
                sizey = y_range[1] - y_range[0],
                sizing = "stretch",
                layer = "below")
            ]

    f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height')
    f.layout.dragmode = 'zoom'

    globals()['plot_obj'] = f
    return f

datashader_image()

datashaderdates

Could you take another look at your datashader_data and see if this time discrepancy explains what you’re seeing there?

1reaction

jonmmeasecommented, Oct 2, 2018

I don’t think I quite follow what you’re intending to happen in the datashader_image example. When I run it I see the plot flash and automatically zoom in until no data is in view. It looks like you’re setting the axis ranges in the callback that response to axis range changes, which I wold expect to cause some problems like this.