我试图在python中编写R等效代码,但没有得到相同的结果。 R代码如下:
# Set parameters
max.people = 50
max.trials = 500
plot.step = 1
# load libraries
library(tidyverse)
#Set up an initial data frame
df<-data.frame("trial"=NA,"people"=NA, "val"=NA)
# Set up a common theme for plots
ztheme<-function(){
theme_classic()+
theme(panel.background=element_rect(fill="#F0F0F0", color="#F0F0F0"))+
theme(plot.background=element_rect(fill="#F0F0F0", color="#F0F0F0"))}
#Run main loop
for(trial in 1:max.trials){
# set up a buffer. Makes the program run a lot faster.
buff<-data.frame("trial"=NA,"people"=NA, "val"=NA)
for(people in 1:max.people){
buff<-rbind(buff,data.frame("trial"=trial,"people"=people, "val"=NA))
samp<-sample(1:365, people, replace=T)
if(length(unique(samp))==length(samp)){
buff$val[nrow(buff)]<-0
}else{
buff$val[nrow(buff)]<-1
}; rm(samp)}
df<-rbind(df, buff); rm(buff)
print(paste(round(trial/(max.trials)*100, 2), "% Complete", sep=""))
}
df<-subset(df, !is.na(df$trial))
rm(max.people); rm(people); rm(trial)
# Generate multiple plots of result
for(n in seq(plot.step,max.trials,plot.step)){
print(
ggplot(summarise(group_by(subset(df, trial<=n), people), prob=mean(val)), aes(people, prob))+
geom_bar(stat="identity", fill="steelblue1")+
geom_smooth(se=F, color="black", method="loess")+
scale_y_continuous(labels=scales::percent, limits=c(0,1))+
labs(title="Birthday Paradox",
subtitle=paste("Based on",n,"simulations."),
x="Number of People in Room",
y="One or More Matching Birthdays (True/False Ratio)",
caption="created by /u/zonination")+
ztheme())
ggsave(paste("bday_", formatC(n,width=5,flag = "0"), ".png", sep=""), height=4.5, width=7, dpi=120, type="cairo-png")
}; rm(n)
我在python中编写了相同的代码,如下所示:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
plt.style.use('ggplot')
maxTrials = 500
maxPeople = 50
plotStep = 1
df = pd.DataFrame(columns=['trial','people','val'])
for trial in range(plotStep, maxTrials+1):
buff = pd.DataFrame()
for people in range(plotStep,maxPeople+1):
buff = buff.append(pd.DataFrame({'trial':[trial],'people':[people],'val':[np.nan]}), ignore_index=True)
samp = [random.randint(1,366) for x in range(people)]
if len(set(samp)) == len(samp):
buff.at[len(buff.index)-1,'val'] = 0
else:
buff.at[len(buff.index)-1,'val'] = 1
del(samp)
df = df.append(buff, ignore_index=True)
del(buff)
print(str(round(trial/(maxTrials)*100, 2)) + "% Complete")
df = df.dropna(axis=0, how='any')
del(maxPeople)
del(people)
del(trial)
for n in range(plotStep,5):
dfCopy = df.loc[df.trial<=n]
dfCopy = dfCopy.groupby(['people'])['val'].mean().to_frame(name='prob').reset_index()
print(dfCopy)
plt.bar(dfCopy['people'],
dfCopy['prob'],
color='blue',
edgecolor='none',
width=0.5,
align='center')
plt.suptitle("Birthday Paradox\n")
plt.title("Based on "+str(n)+" simulations.")
plt.yticks([0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
plt.xlabel("Number of people in room")
plt.ylabel("Probability of one or more matching birthdays")
plt.savefig("bday_"+str(n)+".png", dpi=110, bbox_inches='tight')
代码很好但你没有清除你的轴,所以它会添加每次运行而不清除最后一个。
在plt.cla()
之后添加plt.savefig(...)
会使它看起来很像R输出