我的数据由多个嵌套类别组成,对于每个类别,我都可以生成一个堆叠密度图,如此处所示的
因为我有几个这样的密度图,每个密度图的数据都在 x 域 0 到 100 之间。对于每个堆积密度图,我想要一个单一的山脊图。 最终结果将是岭图,其中每一行都是单个堆叠密度图。 这可能吗?由于山脊图的性质是每个图都模糊了前一个图,我认为观察者可能会误解堆叠密度图曲线下的面积,因为曲线的某些部分可能被下一个山脊图隐藏。因此,我想放弃在每个山脊图中绘制堆积密度图的想法。但我想将每个变量绘制为山脊,但这次包括平均值和标准差线,并将两条标准差线之间的曲线下方的面积涂上阴影。
根据(JohanC)的要求,以下是我想寻求帮助的代码。 不知怎的,我无法摆脱 y 轴上的“密度”标签。
# seaborn ridge plots with penguins dataset
import logging;
import pandas as pd;
import pandas;
import matplotlib.pyplot as plt;
import numpy as np;
#!pip install seaborn;
import seaborn as sns;
LOG_FORMAT=("%(levelname) -5s time:%(asctime)s [%(funcName) ""-5s %(lineno) -5d]: %(message)s");
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT);
LOGGER = logging.getLogger(__name__);
logger_obj: logging.Logger=LOGGER;
my_df = sns.load_dataset("penguins");
sns.set_theme(style="white", rc={"axes.facecolor": (1, 1, 1, 1)});#background transparency
import errno;
def mkdir_p(path):
if(not(os.path.exists(path) and os.path.isdir(path))):
try:
os.makedirs(path,exist_ok=True);
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass;
else:
raise exc;
def generate_plot(
logger_obj: logging.Logger
,my_df: pandas.DataFrame
,sample_size: int
,axs2
):
my_df2 = my_df.copy(deep=True);
species_list: list=list(my_df2["species"].unique());
my_df3: pd.DataFrame();
sample_size2: int=sample_size;
for i2, species in enumerate(species_list):
species_record_count=len(my_df2[my_df2["species"]==species]);
flipper_length_mm_sum=my_df2[(my_df2["species"]==species)]["flipper_length_mm"].sum();
logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
if sample_size2>species_record_count:
sample_size2=species_record_count;
for i2, species in enumerate(species_list):
my_df4=my_df2[my_df2["species"]==species].sample(sample_size2);
species_record_count=len(my_df4);
flipper_length_mm_sum=my_df4["flipper_length_mm"].sum();
logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
if i2==0:
my_df3=my_df4[:];
else:
my_df3=pd.concat([my_df3, my_df4], ignore_index=True);
if 1==1:
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth':2});
palette = sns.color_palette("Set2", 12);
g = sns.FacetGrid(data=my_df3, palette=palette, row="species", hue="species", aspect=9, height=1.2)
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)});
g.map_dataframe(sns.kdeplot, x="flipper_length_mm", fill=True, alpha=1);
g.map_dataframe(sns.kdeplot, x="flipper_length_mm", color="white");
def label_f(x, color, label):
ax2=plt.gca();
ax2.text(0, .2, label, color="black", fontsize=13, ha="left", va="center", transform=ax2.transAxes);
g.map(label_f, "species");
g.fig.subplots_adjust(hspace=-.5);
g.set_titles("");
g.set(yticks=[], xlabel="flipper_length_mm");
g.set_titles(col_template="", row_template="");
g.despine(left=True);
image_png_fn: str="images/penguins.ridge_plot/sample_day_feature.flipper_length_mm.all_species.png";
logger_obj.info("image_png_fn is :'{0}'".format(image_png_fn));
mkdir_p(os.path.abspath(os.path.join(image_png_fn, os.pardir)));
plt.savefig(image_png_fn);
image_png_fn=None;
sample_size: int=30000;
generate_plot(
logger_obj
,my_df
,sample_size
,None
);
g.set(..., ylabel='')
)。
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
penguins = sns.load_dataset('penguins')
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth': 2});
palette = sns.color_palette("Set2", 12);
g = sns.FacetGrid(data=penguins, palette=palette, row="species", hue="species", aspect=9, height=1.2)
for (species, ax), color in zip(g.axes_dict.items(), palette):
# select the subset to be shown in the current subplot
x = penguins[penguins['species'] == species]['flipper_length_mm'].values
sns.kdeplot(x=x, color='white', fill=False, ax=ax)
mean = np.nanmean(x)
std = np.nanstd(x)
kdeline = ax.lines[0]
xs = kdeline.get_xdata()
ys = kdeline.get_ydata()
height = np.interp(mean, xs, ys)
ax.vlines(mean, 0, height, color='black', ls=':')
ax.fill_between(xs, 0, ys, facecolor=color, alpha=1)
# filter the region where x within one standard deviation of the mean
sd_filter = (xs >= mean - std) & (xs <= mean + std)
# show this region with a darker color
ax.fill_between(xs[sd_filter], 0, ys[sd_filter], facecolor='black', alpha=0.06)
ax.text(0, .05, species, color="black", fontsize=13, ha="left", va="bottom", transform=ax.transAxes);
g.fig.subplots_adjust(hspace=-.5, bottom=.15)
g.despine(left=True)
g.set(title='', ylabel='', yticks=[], xlabel="flipper length (mm)")
plt.show()