我有一个复杂的多变量数据集,其结构与此类似:
import pandas as pd
import numpy as np
import datetime as dt
from itertools import cycle, islice
N = 24
start_date = dt.date(2016,1,1)
nbdays = int(365 / N)
df = pd.DataFrame({'Date': [start_date + dt.timedelta(days=i*nbdays) for i in range(1,N+1)],
'Rating': [(100/N)*i for i in range(1,N+1)],
'Plot': list(islice(cycle(range(1, 9)), 0, N)),
'Treatment': list(islice(cycle(range(1, 7)), 0, N)),
'Trial': list(islice(cycle(range(1, 4)), 0, N)),
'Name': list(islice(cycle("ABCDEF"), 0, N)),
'Target': list(islice(cycle("JKLMNOP"), 0, N)),
'Part': list(islice(cycle("WXYZ"), 0, N))
})
我想要:
Date
着色的Rating
与Treatment
Plot
,Trial
,Name
,Target
,Part
)的情节侧面有按钮,以便点击按钮切换相应点的可见性这是我的代码(变量df
中的数据集):
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Set1
from bokeh.models import (CDSView, BooleanFilter, Legend,
DatetimeTickFormatter, Range1d,
HoverTool)
from bokeh.models.widgets import CheckboxButtonGroup, Div
from bokeh.layouts import widgetbox, layout
from bokeh.io import curdoc
columns = ['Treatment', 'Plot', 'Trial', 'Name', 'Target', 'Part']
categories = [sorted(df[column].unique()) for column in columns]
all_columns = ['Date', 'Rating'] + columns
treatment_colormap = dict(zip(categories[0], Set1[6]))
# Create Input controls
divs = [Div(text=column+':') for column in columns[1:]]
controls = [CheckboxButtonGroup(labels=list(map(str, category)), active=list(range(len(category)))) for category in categories[1:]]
# Create Column Data Source that will be used by the plot
source = ColumnDataSource(data=dict((column, []) for column in all_columns))
def select():
actives = [control.active for control in controls]
actives_names = [[category[a] for a in active] for (active, category) in zip(actives, categories[1:])]
presence = [df[column].isin(active_names) for (column, active_names) in zip(columns[1:], actives_names)]
result = df[np.logical_and.reduce(presence)] # https://stackoverflow.com/a/49027984/50065
return result
def update():
sdf = select()
source.data = dict((column, sdf[column]) for column in all_columns)
glyphs = []
selected_treatments = sorted(sdf['Treatment'].unique())
for treatment in selected_treatments:
booleans = [value == treatment for value in source.data['Treatment']]
view = CDSView(source=source, filters=[BooleanFilter(booleans)])
color = treatment_colormap[treatment]
glyphs.append(p.circle(x='Date', y='Rating', source=source, view=view, line_color=color, fill_color=color))
legend = Legend(items=[
("treatment {}".format(treatment), [glyph]) for treatment, glyph
in zip(selected_treatments, glyphs)
])
p.add_layout(legend, 'below')
p.legend.click_policy='hide'
p.legend.location = 'bottom_center'
p.legend.orientation = 'horizontal'
for control in controls:
control.on_change('active', lambda attr, old, new: update())
def datetime_in_miliseconds(date):
date = dt.datetime.strptime(date, '%d/%m/%Y')
epoch = dt.datetime.utcfromtimestamp(0)
return (date - epoch).total_seconds() * 1000.0
hover = HoverTool(tooltips=[('Date', '@Date{%d/%m/%Y}')] + [(column, '@'+column)
for column in all_columns[1:]], formatters={
'Date': 'datetime', # use 'datetime' formatter for 'Date' field
})
p = figure(x_axis_type="datetime", tools=[hover])
p.title.text = 'Date vs Rating'
p.xaxis.axis_label = 'Date'
p.xaxis.formatter = DatetimeTickFormatter(days = ['%d/%m/%y'])
start = datetime_in_miliseconds('01/01/2016')
end = datetime_in_miliseconds('31/12/2016')
p.x_range=Range1d(start, end)
p.yaxis.axis_label = 'Rating'
p.ygrid.band_fill_color="olive"
p.ygrid.band_fill_alpha = 0.1
p.y_range=Range1d(0,100)
sizing_mode = 'scale_width'
inputs = widgetbox(*sum(zip(divs, controls), tuple()), sizing_mode=sizing_mode)
l = layout([[p, inputs]], sizing_mode=sizing_mode)
update() # initial load of the data
curdoc().add_root(l)
当你运行bokeh serve --show main.py
(bokeh
版本0.12.10)时看起来像这样:
什么有效:
什么行不通:
我如何解决最后两点?
这是一些想法:
CustomJSFilter
包装一个javascript函数来做数据过滤。p.circle()
来绘制所有圆圈。factor_cmap
将治疗列映射到颜色。tags
属性在Python中保存数据并在javascript中读取它。由于只有一个GlyphRenderer
,因此可见性切换不适用于它的传奇。
要解决这个问题,创建一个虚拟ColumnDataSource
并用它多次调用p.circle()
来创建一个虚拟GlyphRenderer
列表。为这些虚拟GlyphRenderer
创建图例,并将它们的可见属性更改与调用CustomJS
重新绘制图形的source.change.emit()
相关联。
因为所有过滤器计算都是由javascript执行的,所以您可以创建一个可以与用户输入交互的静态html文件。
这是笔记本:
http://nbviewer.jupyter.org/gist/ruoyu0088/01ddf28ed041508304843f49a794d66a
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CustomJS, CDSView, CustomJSFilter, HoverTool
from bokeh.models.widgets import CheckboxButtonGroup
from bokeh.io import show, output_notebook
from bokeh.palettes import Set1
from bokeh.transform import factor_cmap
from bokeh.layouts import widgetbox, layout
import pandas as pd
import numpy as np
import datetime as dt
from itertools import cycle, islice
output_notebook()
N = 24
start_date = dt.date(2016,1,1)
nbdays = int(365 / N)
df = pd.DataFrame({'Date': [start_date + dt.timedelta(days=i*nbdays) for i in range(1,N+1)],
'Rating': [(100/N)*i for i in range(1,N+1)],
'Plot': list(islice(cycle(range(1, 9)), 0, N)),
'Treatment': list(islice(cycle(range(1, 7)), 0, N)),
'Trial': list(islice(cycle(range(1, 4)), 0, N)),
'Name': list(islice(cycle("ABCDEF"), 0, N)),
'Target': list(islice(cycle("JKLMNOP"), 0, N)),
'Part': list(islice(cycle("WXYZ"), 0, N))
})
columns = 'Plot', 'Trial', 'Name', 'Target', 'Part'
unique_items = [df[col].unique() for col in columns]
df["Treatment"] = df["Treatment"].astype(str)
source = ColumnDataSource(data=df)
dummy_source = ColumnDataSource(data={"x":[], "y":[]})
hover = HoverTool(tooltips=[('Date', '@Date{%d/%m/%Y}')] + [(column, '@'+column)
for column in columns], formatters={
'Date': 'datetime', # use 'datetime' formatter for 'Date' field
})
p = figure(x_axis_type="datetime", tools=[hover])
color = factor_cmap("Treatment", Set1[9], df.Treatment.unique())
for i, label in enumerate(df.Treatment.unique()):
dummy_circle = p.circle(x="x", y="y", source=dummy_source, legend="Treatment {}".format(label), color=Set1[9][i])
dummy_circle.tags = [label]
p.legend.location = "bottom_right"
p.legend.click_policy = "hide"
def source_change(source=source):
source.change.emit()
callback_source_change = CustomJS.from_py_func(source_change)
for item in p.legend[0].items:
item.renderers[0].js_on_change("visible", callback_source_change)
controls = [CheckboxButtonGroup(labels=items.astype(str).tolist(), active=list(range(len(items)))) for items in unique_items]
widgets = widgetbox(*controls)
for name, control in zip(columns, controls):
control.tags = [name]
def func_filter(source=source, legend=p.legend[0], widgets=widgets):
window.widgets = widgets
visible_treatments = [item.renderers[0].tags for item in legend.items if item.renderers[0].visible]
date = source.data['Date']
treatments = source.data['Treatment']
res = []
selectors = {}
for widget in widgets.children:
col = widget.tags[0]
selectors[col] = dict([(widget.labels[i], i) for i in widget.active])
for i in range(len(date)):
flag = treatments[i] in visible_treatments
for key, val in selectors.items():
if source.data[key][i] not in val:
flag = False
break
res.append(flag)
return res
view = CDSView(source=source, filters=[CustomJSFilter.from_py_func(func_filter)])
p.circle(x='Date', y='Rating', source=source, view=view, line_color=color, fill_color=color)
for control in controls:
control.js_on_change("active", callback_source_change)
show(layout([[p, widgets]]))