想要在人口金字塔中添加一些百分比注释,但目前百分比是重复的,不能,我一生都想不出如何解决它。
#creating a count of each male in each age class
male_age_count = cleandf[cleandf['Gender'] == 'Male'].groupby('Age Class')['Age Class'].count()
#print(male_age_count)
#creating count of each female in each age class
female_age_count = cleandf[cleandf['Gender']=='Female'].groupby('Age Class')['Age Class'].count()
#print(f'The female age count of each age class:{female_age_count}')
#creating the negative male count to create correct format for age pyramid
negative_male_age_count = -male_age_count.abs()
#print(f'The negative male count of each age class:{negative_male_age_count}')
#to sort age classes in logical order on graph
ageclass = cleandf['Age Class'].unique()
ageclass = sorted(ageclass, key=lambda x: 200 if x == '100 +' else
int(x.split('-')[0]), reverse=True)
#ageclass = sorted(ageclass,key=lambda x:int(x.split('-')[0]),reverse=True)
#creating variable for graph
age_p = pd.DataFrame({'Age': ageclass,
'Male':-male_age_count.reindex(ageclass),
'Female':female_age_count.reindex(ageclass)})
plt.figure(figsize=(10,6))
#creating bar graph for male side
male_barplot = sns.barplot(x='Male', y= 'Age', data=age_p, color=('mediumblue'), label='Male')
#creating bar graph for female side
female_barplot = sns.barplot(x='Female',y= 'Age', data=age_p, color=('darkorange'), label='Female')
#obtaining entire population
population_total = cleandf.shape[0]
#adding percentage markers to male side of graph
for p in male_barplot.patches:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100* width/population_total)
x = p.get_x() + width - 10
y= p.get_y() + p.get_height() / 2
male_barplot.annotate(percentage, (x,y), ha='center',va='center')
#adding percentage markers to female side of graph
for p in female_barplot.patches:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100* abs(width)/population_total)
x = p.get_x() + width +10
y= p.get_y() + p.get_height() / 2
female_barplot.annotate(percentage, (x,y), ha='center',va='center')
plt.text(-400,5, 'Male', fontsize=15,fontweight='bold')
plt.text(300,5, 'Female', fontsize=15,fontweight='bold')
plt.legend(loc='best')
plt.xticks(range(-600,700,100))
plt.title('Population/Age Pyramid',fontsize=20,fontweight='bold')
plt.xlabel('Population',fontsize=15,fontweight='bold')
plt.ylabel('Age Range',fontsize=15,fontweight='bold')
plt.show()
还尝试了添加百分比的 iterrows 方法,但这破坏了一切。 上述代码的当前输出
由于sns.barplot命令中的重复数据而发生错误
#creating a count of each male in each age class
male_age_count = cleandf[cleandf['Gender'] == 'Male'].groupby('Age Class')['Age Class'].count()
#print(male_age_count)
#creating count of each female in each age class
female_age_count = cleandf[cleandf['Gender']=='Female'].groupby('Age Class')['Age Class'].count()
#print(f'The female age count of each age class:{female_age_count}')
#creating the negative male count to create correct format for age pyramid
negative_male_age_count = -male_age_count.abs()
#print(f'The negative male count of each age class:{negative_male_age_count}')
#to sort age classes in logical order on graph
ageclass = cleandf['Age Class'].unique()
ageclass = sorted(ageclass, key=lambda x: 200 if x == '100 +' else
int(x.split('-')[0]), reverse=True)
#ageclass = sorted(ageclass,key=lambda x:int(x.split('-')[0]),reverse=True)
#creating variable for graph
age_p = pd.DataFrame({'Age': ageclass,
'Male':-male_age_count.reindex(ageclass),
'Female':female_age_count.reindex(ageclass)})
plt.figure(figsize=(10,6))
#creating bar graph for male side
male_barplot = sns.barplot(x='Male', y= 'Age', data=age_p, color=('mediumblue'), label='Male')
#creating bar graph for female side
female_barplot = sns.barplot(x='Female',y= 'Age', data=age_p, color=('darkorange'), label='Female')
#obtaining entire population
population_total = cleandf.shape[0]
label_anotate_step = 10
# adding percentage markers to male side of graph
for p in male_barplot.patches[:len(male_barplot.patches)//2]:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100 * width / population_total)
x = p.get_x() + width - label_anotate_step
y = p.get_y() + p.get_height() / 2
male_barplot.annotate(percentage, (x, y), ha='center', va='center')
# adding percentage markers to female side of graph
for p in female_barplot.patches[:len(female_barplot.patches)//2]:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100 * abs(width) / population_total)
x = p.get_x() + width + label_anotate_step
y = p.get_y() + p.get_height() / 2
female_barplot.annotate(percentage, (x, y), ha='center', va='center')
plt.text(-400,5, 'Male', fontsize=15,fontweight='bold')
plt.text(300,5, 'Female', fontsize=15,fontweight='bold')
plt.legend(loc='best')
plt.xticks(range(-600,700,100))
plt.title('Population/Age Pyramid',fontsize=20,fontweight='bold')
plt.xlabel('Population',fontsize=15,fontweight='bold')
plt.ylabel('Age Range',fontsize=15,fontweight='bold')
plt.show()