目前,seaborn 通过根据 split=True
变量设置
hue
来提供 分割小提琴图的功能。 我想制作一个“半”小提琴图,即省略每把小提琴的一半的图。这样的图描绘了类似于每个连续变量的 pdf 的内容,仅绘制在每个分类变量的每条垂直线的一侧。我已经设法欺骗
seaborn
,用绘制的值范围之外的额外数据点和额外的虚拟色调来绘制此图,但我想知道这是否可以在不实际更改数据集的情况下完成,例如在
sns.violinplot()
参数内。例如这张图:
由以下代码片段创建:
# imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# load dataset from seaborn
datalist = sns.get_dataset_names()
dataset_name = 'iris'
if dataset_name in datalist:
df = sns.load_dataset(dataset_name)
else:
print("Dataset with name: " + dataset_name + " was not found in the available datasets online by seaborn.")
# prepare data
df2 = df.append([-999,-999,-999,-999,'setosa'])
df2['huecol'] = 0.0
df2['huecol'].iloc[-1]= -999
# plot
fig = plt.figure(figsize=(6,6))
sns.violinplot(x='species',y="sepal_width",
split=True, hue ='huecol', inner = 'quartile',
palette="pastel", data=df2, legend=False)
plt.title('iris')
# remove hue legend
leg = plt.gca().legend()
leg.remove()
plt.ylim([1,5.0])
plt.show()
seaborn.kdeplot,因为 violinplot 本质上是一个单侧核密度图。
示例categorical_kde_plot
的函数定义如下
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=False,
)
使用 horizontal=True
,输出将如下所示:代码
import seaborn as sns
from matplotlib import pyplot as plt
def categorical_kde_plot(
df,
variable,
category,
category_order=None,
horizontal=False,
rug=True,
figsize=None,
):
"""Draw a categorical KDE plot
Parameters
----------
df: pd.DataFrame
The data to plot
variable: str
The column in the `df` to plot (continuous variable)
category: str
The column in the `df` to use for grouping (categorical variable)
horizontal: bool
If True, draw density plots horizontally. Otherwise, draw them
vertically.
rug: bool
If True, add also a sns.rugplot.
figsize: tuple or None
If None, use default figsize of (7, 1*len(categories))
If tuple, use that figsize. Given to plt.subplots as an argument.
"""
if category_order is None:
categories = list(df[category].unique())
else:
categories = category_order[:]
figsize = (7, 1.0 * len(categories))
fig, axes = plt.subplots(
nrows=len(categories) if horizontal else 1,
ncols=1 if horizontal else len(categories),
figsize=figsize[::-1] if not horizontal else figsize,
sharex=horizontal,
sharey=not horizontal,
)
for i, (cat, ax) in enumerate(zip(categories, axes)):
sns.kdeplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
# kde kwargs
bw_adjust=0.5,
clip_on=False,
fill=True,
alpha=1,
linewidth=1.5,
ax=ax,
color="lightslategray",
)
keep_variable_axis = (i == len(fig.axes) - 1) if horizontal else (i == 0)
if rug:
sns.rugplot(
data=df[df[category] == cat],
x=variable if horizontal else None,
y=None if horizontal else variable,
ax=ax,
color="black",
height=0.025 if keep_variable_axis else 0.04,
)
_format_axis(
ax,
cat,
horizontal,
keep_variable_axis=keep_variable_axis,
)
plt.tight_layout()
plt.show()
def _format_axis(ax, category, horizontal=False, keep_variable_axis=True):
# Remove the axis lines
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
if horizontal:
ax.set_ylabel(None)
lim = ax.get_ylim()
ax.set_yticks([(lim[0] + lim[1]) / 2])
ax.set_yticklabels([category])
if not keep_variable_axis:
ax.get_xaxis().set_visible(False)
ax.spines["bottom"].set_visible(False)
else:
ax.set_xlabel(None)
lim = ax.get_xlim()
ax.set_xticks([(lim[0] + lim[1]) / 2])
ax.set_xticklabels([category])
if not keep_variable_axis:
ax.get_yaxis().set_visible(False)
ax.spines["left"].set_visible(False)
if __name__ == "__main__":
df = sns.load_dataset("tips")
categorical_kde_plot(
df,
variable="tip",
category="day",
category_order=["Thur", "Fri", "Sat", "Sun"],
horizontal=True,
)
如果您也有色调,但数据未配对(因此在一些全小提琴中会有半小提琴),请考虑使用您的色调类别而不是布尔参数。
hue_order
不适用于色调作为布尔参数。
ax = sns.violinplot(
data=df,
x="day",
y="total_bill",
hue="sex", # when you have hue
split=True,
hue_order=["Male", "Female"], # set hue_order with cat instead of bool
inner='quartile'
)
生成上图的完整代码:
import seaborn as sns
import matplotlib.pyplot as plt
if __name__ == "__main__":
df = sns.load_dataset("tips")
# manually remove tip from man on Friday
df = df.drop(df[(df["sex"] == "Male") & (df["day"] == "Fri")].index)
print(df)
ax = sns.violinplot(
data=df,
x="day",
y="total_bill",
hue="sex",
split=True,
scale_hue=False,
saturation=0.75,
hue_order=["Male", "Female"],
dodge=True,
inner='quartile'
)
plt.show(block=True)