R For 循环和模拟

问题描述 投票:0回答:1

set.seed(2024)
#conditions

b1<-c(-0.3,0.3)
b2<-c(-0.3,0.3)


conditions <- expand.grid(b1 = b1, b2 = b2)

# some other conditions 
sample_size <- 200
nsim<- 10 #number of simulations 

#empty holders 

xcoef<-numeric()
zcoef<-numeric()
int_pvalue<-numeric()

results <- data.frame(
  condition_id = integer(),
  xcoef = numeric(),
  zcoef = numeric(),
  t1er = numeric()
)


for (i in 1:nrow(conditions)){
  
  xcoef <- numeric(nsim) # Reinitialize storage for x coefficients
  zcoef <- numeric(nsim) # Reinitialize storage for z coefficients
  int_pvalue <- numeric(nsim) # Reinitialize storage for p-values
  
  for( j in 1:nsim){
  
  #true data generation
   sigma<-matrix(c(1,0,
                  0,1),2,2)
   XZ<-as.data.frame(MASS::mvrnorm(n=sample_size,mu=c(0,0),Sigma=sigma))
   colnames(XZ)<-c("x","z")
   XZ$err<-rnorm(n=sample_size,mean=0,sd=1)
 
   
  XZ$True_y <- 
               conditions$b1[i] * XZ$x +
               conditions$b2[i] * XZ$z +
               XZ$err
  
  lmodel<-summary(lm(True_y ~ x * z, data=XZ))
   xcoef[[j]]<-lmodel$coefficients[["x","Estimate"]]
   zcoef[[j]]<-lmodel$coefficients[["z","Estimate"]]
   
   
  # Calculate aggregated results
  results$condition_id<-i
  results$xcoef[i] <- mean(xcoef) # Mean x coefficient estimate
  results$zcoef[i] <- mean(zcoef) # Mean z coefficient estimate
 
  
  }
  


  }

弹出错误消息。

此外,我还注意到了

计算汇总结果

这部分,它不计算聚合结果,而是只放入最后一个nsim(嵌套循环)的结果。

我想汇总多次模拟的结果,以获得每个估计值和 1 类错误率的平均值。

r for-loop simulation nested-loops
1个回答
0
投票

有这样的事吗?
将循环重写为函数。

  • 内循环生成数据并拟合一个回归;
  • 外层循环使用
    nsim
    调用上述函数
    replicate
    次,并提取相关统计数据;
  • 然后
    apply
    上的
    conditions
    循环调用后一个函数。

下面的结果不包括

condition
数字1到4,但这只是
cbind(condition = 1:4, result)
的问题。

sim_one <- function(condition, n, mu, sigma) {
  # true data generation
  XZ <- MASS::mvrnorm(n = n, mu = mu, Sigma = sigma)
  colnames(XZ) <- c("x", "z")
  err <- rnorm(n = sample_size, mean = 0, sd = 1)
  True_y <- c(condition %*% t(XZ)) + err
  XZ <- cbind.data.frame(XZ, True_y)
  
  # fit one regression
  lm(True_y ~ x*z, data = XZ)
}
sim_many <- function(condition, R, n, mu, sigma) {
  # run nsim regressions
  fit_list <- replicate(R, sim_one(condition, n, mu, sigma), simplify = FALSE)
  
  # extract the coefficients and compute their mean values
  mean_xz <- sapply(fit_list, \(fit) coef(fit)[c("x", "z")]) |> rowMeans()
  mean_xz <- unname(mean_xz)
  
  # get the F statistics mean value
  ff <- sapply(fit_list, \(fit) summary(fit)[["fstatistic"]][1L]) |> mean()
  
  # the degrees of freedom are always the same, extract them
  # from any of the fits, in this case from the 1st
  df12 <- summary(fit_list[[1L]])[["fstatistic"]][2:3]
  
  # p-value of the average F stat
  t1er <- pf(ff, df1 = df12[1L], df2 = df12[2L], lower.tail = FALSE)
  
  # return a named vector
  c(xcoef = mean_xz[1L], zcoef = mean_xz[2L], t1er = t1er)
}

# conditions
b1 <- c(-0.3, 0.3)
b2 <- c(-0.3, 0.3)
conditions <- expand.grid(b1 = b1, b2 = b2) |> as.matrix()
ncond <- nrow(conditions)

# some other conditions 
sample_size <- 200
nsim <- 10 # number of simulations 

mu <- c(0, 0)
sigma <- matrix(c(1, 0, 0, 1), 2, 2)

set.seed(2024)
t(apply(conditions, 1L, sim_many, R = nsim, n = sample_size, mu = mu, sigma = sigma))
#>           xcoef      zcoef         t1er
#> [1,] -0.3316541 -0.2840518 4.240253e-08
#> [2,]  0.2629164 -0.2967959 7.393880e-07
#> [3,] -0.3222810  0.2615112 8.463187e-08
#> [4,]  0.2823582  0.3164789 2.251141e-08

创建于 2024 年 12 月 1 日,使用 reprex v2.1.1

© www.soinside.com 2019 - 2024. All rights reserved.