Matplotlib 基于 pandas 数据框中特定列的不同颜色的箱线图

问题描述 投票:0回答:1

我正在尝试改变这个情节 enter image description here

要搭配这个风格

enter image description here

所需更改如下:

  1. 将氧化物的各个盒子的颜色更改为红色,过渡为绿色,新鲜为蓝色
  2. 添加氧化物、过渡和新鲜的图例
  3. 简化 x 轴以仅包含元素

这是我的数据摘录:

Oxidation   Elements    GAI
1.OXIDE Ag_ppm  1
2.TRANS Ag_ppm  1
2.TRANS Ag_ppm  0
2.TRANS Ag_ppm  2
2.TRANS Ag_ppm  2
2.TRANS Ag_ppm  1
3.FRESH Ag_ppm  2
3.FRESH Ag_ppm  0
3.FRESH Ag_ppm  0
3.FRESH Ag_ppm  1
3.FRESH Ag_ppm  0
3.FRESH Ag_ppm  0
1.OXIDE Ag_ppm  0
1.OXIDE Ag_ppm  1
1.OXIDE Ag_ppm  0
1.OXIDE Ag_ppm  0
1.OXIDE Ag_ppm  0
1.OXIDE Ag_ppm  0
1.OXIDE Cu_ppm  1
2.TRANS Cu_ppm  1
2.TRANS Cu_ppm  1
2.TRANS Cu_ppm  1
2.TRANS Cu_ppm  2
2.TRANS Cu_ppm  1
3.FRESH Cu_ppm  2
3.FRESH Cu_ppm  1
3.FRESH Cu_ppm  2
3.FRESH Cu_ppm  2
3.FRESH Cu_ppm  2
3.FRESH Cu_ppm  1
3.FRESH Cu_ppm  2
1.OXIDE Cu_ppm  3
1.OXIDE Cu_ppm  3
1.OXIDE Cu_ppm  3
1.OXIDE Cu_ppm  4
1.OXIDE Cu_ppm  2
1.OXIDE Mg_pct  1
1.OXIDE Mg_pct  1
1.OXIDE Mg_pct  3
1.OXIDE Mg_pct  2
1.OXIDE Mg_pct  1
1.OXIDE Mg_pct  1
1.OXIDE Mg_pct  2
1.OXIDE Mg_pct  2
2.TRANS Mg_pct  2
2.TRANS Mg_pct  2
2.TRANS Mg_pct  2
2.TRANS Mg_pct  2
2.TRANS Mg_pct  2
3.FRESH Mg_pct  2
3.FRESH Mg_pct  2
3.FRESH Mg_pct  2
3.FRESH Mg_pct  2
3.FRESH Mg_pct  2

这是我当前的脚本:

# Plot boxplots
fig, ax = plt.subplots(figsize = (20,8))

# Box plot properties
mean = dict(marker = 'x', mec = 'Black', ms = 9)
median=dict(color ="Black", lw=1.2)
whisker=dict(color ="Black", lw=1.2)
cap=dict(color ="Black", lw=1.2)
flier=dict(markerfacecolor= 'green', ms=10)

key_label = ['Ag','Cu','Mg']
key_ox = ['Oxide','Transition','Fresh','Oxide','Transition','Fresh','Oxide','Transition','Fresh']
col_ox = ['r','g','b','r','g','b','r','g','b']

gai_waste.boxplot(ax=ax, by = ['Elements', 'Oxidation'], column =["GAI"],
                  color = '#ADD8E6', patch_artist=True, whis=[10, 90], #widths=0.2,
                  whiskerprops = whisker, showmeans=True, meanprops = mean, medianprops=median, capprops = cap, showfliers=False,
                  grid = True, rot = 90, fontsize = 'medium')

plt.ylabel("GAI", fontsize = 'x-large', fontname='Calibri', weight='bold')
plt.xlabel(None)
plt.yticks(fontsize = 'large', fontname='Calibri')
plt.xticks(np.arange(1, 10, 1),labels=key_ox,ha='center',fontsize = 'large', fontname='Calibri', rotation = 0)
ax.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
plt.grid(True, which='both', color='lightgrey',ls = '--')
plt.suptitle('')
plt.title('')

x=2
for i in range(3):
    plt.text(x = x , y = -5.5, s = str(key_label[i]),horizontalalignment='center', fontsize = 'x-large', weight='bold')
    x+=3

预先感谢您的帮助!

python pandas matplotlib boxplot
1个回答
0
投票

正如评论中指出的那样,

matplotlib
不是最好的解决方案,这就是为什么首选方法是
seaborn
,但如果你绝对必须这样做,那么这是我的建议:

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter
import pandas as pd

data = {
    'Oxidation': ['OXIDE', 'TRANS', 'TRANS', 'TRANS', 'TRANS', 'TRANS', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE', 'TRANS', 'TRANS', 'TRANS', 'TRANS', 'TRANS', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'FRESH', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE', 'OXIDE'],
    'Elements': ['Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Ag_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Cu_ppm', 'Mg_pct', 'Mg_pct', 'Mg_pct', 'Mg_pct', 'Mg_pct'],
    'GAI': np.random.randint(0, 5, 36)
}
df = pd.DataFrame(data)

fig, ax = plt.subplots(figsize=(20, 8))

colors = {'OXIDE': 'red', 'TRANS': 'green', 'FRESH': 'blue'}

boxprops = dict(linestyle='-', linewidth=1.5)
medianprops = dict(linestyle='-', linewidth=1.5, color='k')
meanprops = dict(marker='X', markerfacecolor='black', markeredgecolor='black')
whiskerprops = dict(linestyle='-', linewidth=1.5)
capprops = dict(linestyle='-', linewidth=1.5)
flierprops = dict(marker='o', color='k', alpha=0.5)

for label, df_group in df.groupby('Elements'):
    element_index = np.unique(df['Elements']).tolist().index(label)
    for oxidation, group in df_group.groupby('Oxidation'):
        ax.boxplot(group['GAI'], positions=[element_index], widths=0.6,
                   boxprops=dict(facecolor=colors[oxidation], linestyle='-', linewidth=1.5),
                   medianprops=medianprops, meanprops=meanprops, whiskerprops=whiskerprops,
                   capprops=capprops, flierprops=flierprops, patch_artist=True)

element_labels = [el.split('_')[0] for el in np.unique(df['Elements'])]
ax.set_xticks(range(len(element_labels)))
ax.set_xticklabels(element_labels, fontsize='large', fontname='Calibri', rotation=0)

from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=colors[name], label=name) for name in ['OXIDE', 'TRANS', 'FRESH']]
ax.legend(handles=legend_elements, loc='upper right')

plt.ylabel("GAI", fontsize='x-large', fontname='Calibri', weight='bold')
plt.yticks(fontsize='large', fontname='Calibri')
plt.grid(True, which='both', linestyle='--', linewidth=0.5, color='grey', axis='y')
plt.title('Distribution of GAI by Element and Oxidation Phase', fontsize=14)

plt.show()

这给出了

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.