如何向脊线图添加额外的图形信息

问题描述 投票:0回答:1

我的数据由多个嵌套类别组成,对于每个类别,我都可以生成一个堆叠密度图,如此处所示的

因为我有几个这样的密度图,每个密度图的数据都在 x 域 0 到 100 之间。对于每个堆积密度图,我想要一个单一的山脊图。 最终结果将是岭图,其中每一行都是单个堆叠密度图。 这可能吗?

由于山脊图的性质是每个图都模糊了前一个图,我认为观察者可能会误解堆叠密度图曲线下的面积,因为曲线的某些部分可能被下一个山脊图隐藏。因此,我想放弃在每个山脊图中绘制堆积密度图的想法。但我想将每个变量绘制为山脊,但这次包括平均值和标准差线,并将两条标准差线之间的曲线下方的面积涂上阴影。

根据(JohanC)的要求,以下是我想寻求帮助的代码。 不知怎的,我无法摆脱 y 轴上的“密度”标签。

# seaborn ridge plots with penguins dataset import logging; import pandas as pd; import pandas; import matplotlib.pyplot as plt; import numpy as np; #!pip install seaborn; import seaborn as sns; LOG_FORMAT=("%(levelname) -5s time:%(asctime)s [%(funcName) ""-5s %(lineno) -5d]: %(message)s"); logging.basicConfig(level=logging.INFO, format=LOG_FORMAT); LOGGER = logging.getLogger(__name__); logger_obj: logging.Logger=LOGGER; my_df = sns.load_dataset("penguins"); sns.set_theme(style="white", rc={"axes.facecolor": (1, 1, 1, 1)});#background transparency import errno; def mkdir_p(path): if(not(os.path.exists(path) and os.path.isdir(path))): try: os.makedirs(path,exist_ok=True); except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass; else: raise exc; def generate_plot( logger_obj: logging.Logger ,my_df: pandas.DataFrame ,sample_size: int ,axs2 ): my_df2 = my_df.copy(deep=True); species_list: list=list(my_df2["species"].unique()); my_df3: pd.DataFrame(); sample_size2: int=sample_size; for i2, species in enumerate(species_list): species_record_count=len(my_df2[my_df2["species"]==species]); flipper_length_mm_sum=my_df2[(my_df2["species"]==species)]["flipper_length_mm"].sum(); logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum)); if sample_size2>species_record_count: sample_size2=species_record_count; for i2, species in enumerate(species_list): my_df4=my_df2[my_df2["species"]==species].sample(sample_size2); species_record_count=len(my_df4); flipper_length_mm_sum=my_df4["flipper_length_mm"].sum(); logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum)); if i2==0: my_df3=my_df4[:]; else: my_df3=pd.concat([my_df3, my_df4], ignore_index=True); if 1==1: sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth':2}); palette = sns.color_palette("Set2", 12); g = sns.FacetGrid(data=my_df3, palette=palette, row="species", hue="species", aspect=9, height=1.2) sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}); g.map_dataframe(sns.kdeplot, x="flipper_length_mm", fill=True, alpha=1); g.map_dataframe(sns.kdeplot, x="flipper_length_mm", color="white"); def label_f(x, color, label): ax2=plt.gca(); ax2.text(0, .2, label, color="black", fontsize=13, ha="left", va="center", transform=ax2.transAxes); g.map(label_f, "species"); g.fig.subplots_adjust(hspace=-.5); g.set_titles(""); g.set(yticks=[], xlabel="flipper_length_mm"); g.set_titles(col_template="", row_template=""); g.despine(left=True); image_png_fn: str="images/penguins.ridge_plot/sample_day_feature.flipper_length_mm.all_species.png"; logger_obj.info("image_png_fn is :'{0}'".format(image_png_fn)); mkdir_p(os.path.abspath(os.path.join(image_png_fn, os.pardir))); plt.savefig(image_png_fn); image_png_fn=None; sample_size: int=30000; generate_plot( logger_obj ,my_df ,sample_size ,None );

enter image description here

python matplotlib seaborn
1个回答
0
投票
这是企鹅数据集的岭图的样子,每个子图都添加了平均值和标准差区域。针对您的具体情况可能需要进行大量调整。 (要删除 y 标签,您可以使用

g.set(..., ylabel='')

)。

from matplotlib import pyplot as plt import seaborn as sns import numpy as np penguins = sns.load_dataset('penguins') sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth': 2}); palette = sns.color_palette("Set2", 12); g = sns.FacetGrid(data=penguins, palette=palette, row="species", hue="species", aspect=9, height=1.2) for (species, ax), color in zip(g.axes_dict.items(), palette): # select the subset to be shown in the current subplot x = penguins[penguins['species'] == species]['flipper_length_mm'].values sns.kdeplot(x=x, color='white', fill=False, ax=ax) mean = np.nanmean(x) std = np.nanstd(x) kdeline = ax.lines[0] xs = kdeline.get_xdata() ys = kdeline.get_ydata() height = np.interp(mean, xs, ys) ax.vlines(mean, 0, height, color='black', ls=':') ax.fill_between(xs, 0, ys, facecolor=color, alpha=1) # filter the region where x within one standard deviation of the mean sd_filter = (xs >= mean - std) & (xs <= mean + std) # show this region with a darker color ax.fill_between(xs[sd_filter], 0, ys[sd_filter], facecolor='black', alpha=0.06) ax.text(0, .05, species, color="black", fontsize=13, ha="left", va="bottom", transform=ax.transAxes); g.fig.subplots_adjust(hspace=-.5, bottom=.15) g.despine(left=True) g.set(title='', ylabel='', yticks=[], xlabel="flipper length (mm)") plt.show()

seaborn ridge plot with mean and sdevs

© www.soinside.com 2019 - 2024. All rights reserved.