我是初次发布者,但是长期用户在这里堆栈溢出。另外,我是R的初学者,所以请裸身。我正在尝试使用具有4列的数据框(所有这些都是因变量)和具有194行和212列的数据框进行简单的线性回归分析。我还有5个其他数据框用作相同分析的因变量
在我当前的代码中,我已经达到了预期的结果,但是我需要进行扩展,我试图添加一个额外的for循环(用于因变量的列),但是我还需要同时创建更多的空白列表。
我想知道我将如何实现?
我当前的for循环是:
y <- data.frame(Green_Class_Commercial[,-1])
x <- data.frame(lagvar[1:175,c(-1,-2)])
out <- data.frame(NULL) # create object to keep results
for (i in 1:length(x)) {
m <- summary(lm(y[,1] ~ x[,i])) # run model
out[i, 1] <- names(x)[i] # print variable name
out[i, 2] <- m$coefficients[1,1] # intercept
out[i, 3] <- m$coefficients[2,1] # coefficient
out[i, 4] <-m$coefficients[2,4] # Pvalue
out[i,5] <-m$r.squared # R-squared
}
names(out) <- c("Variable", "Intercept", "Coefficient","P-val","R-square")
head(out)
提供输出
> head(out)
Variable Intercept Coefficient P-val R-square
1 GDP.SC 0.2540527 -4.722220e-07 0.7032087 8.411229e-04
2 GDP.SC1 0.1148311 3.107631e-07 0.7959237 3.899366e-04
3 GDP.SC2 0.1609010 4.998762e-08 0.9673014 9.855831e-06
4 GDP.SC3 0.1353608 1.959274e-07 0.8746321 1.468544e-04
5 GDP.SC4 0.1439931 1.487237e-07 0.9064221 8.200597e-05
6 CivilianLaborForce.SC 0.2595231 -4.078450e-08 0.7716514 4.881398e-04
>
所以这是我要运行回归的变量
#The x Variable
structure(list(GDP.SC = c(154698, 154698, 154698, 154698, 154698,
154698, 154698, 154698, 154698, 154698, 160138.4, 160138.4, 160138.4,
160138.4, 160138.4, 160138.4, 160138.4, 160138.4, 160138.4, 160138.4
), GDP.SC1 = c(NA, 154698, 154698, 154698, 154698, 154698, 154698,
154698, 154698, 154698, 154698, 160138.4, 160138.4, 160138.4,
160138.4, 160138.4, 160138.4, 160138.4, 160138.4, 160138.4),
GDP.SC2 = c(NA, NA, 154698, 154698, 154698, 154698, 154698,
154698, 154698, 154698, 154698, 154698, 160138.4, 160138.4,
160138.4, 160138.4, 160138.4, 160138.4, 160138.4, 160138.4
), GDP.SC3 = c(NA, NA, NA, 154698, 154698, 154698, 154698,
154698, 154698, 154698, 154698, 154698, 154698, 160138.4,
160138.4, 160138.4, 160138.4, 160138.4, 160138.4, 160138.4
), GDP.SC4 = c(NA, NA, NA, NA, 154698, 154698, 154698, 154698,
154698, 154698, 154698, 154698, 154698, 154698, 160138.4,
160138.4, 160138.4, 160138.4, 160138.4, 160138.4)), row.names = c(NA,
20L), class = "data.frame")
#The Y Variable
structure(list(X = 1:20, ComBus = c(0.83, 0, 0.23, 0.09, 0.1,
0.11, 0.15, 0.18, 0.37, 0.19, 0, 0.18, 0.09, 0.1, 0.03, 0.5,
0.14, 0.17, 0.11, 0.06), ComCon = c(NA, 0, 0, 0, 0, 0.5, 0, 0,
NA, 0.67, 0, 0, 0, 0, 0.5, 0, 0, NA, 1, 0), ComNoo = c(0.25,
0.14, 0.38, 0.17, 0.14, 0.33, 0.44, 0.05, 0.04, 0.1, 0.18, 0.06,
0.23, 0.14, 0.5, 0.14, 0.5, 0, 0.14, 0.23), ComOO = c(0, 0, 0,
0, 0, 0.33, 0, 0, 0, 0.18, 0.22, 0.15, 0, 0, 0.17, 0, 0, 0, 0,
0)), row.names = c(NA, 20L), class = "data.frame")
好,这对您有好处吗?如果可以的话,我用Apply替换循环?
### Some dummy dataframes
x <- data.frame(v1 = rnorm(1:10),
v2 = rnorm(1:10),
v3 = runif(10, 1, 1000),
v4 = runif(10, 1, 1000))
x2 <- data.frame(v1 = rnorm(1:10),
v2 = rnorm(1:10),
v3 = runif(10, 1, 1000),
v4 = runif(10, 1, 1000))
y <- data.frame(v1 = rnorm(1:10),
v2 = rnorm(1:10),
v3 = runif(10, 1, 1000),
v4 = runif(10, 1, 1000))
y2 <- data.frame(v1 = rnorm(1:10),
v2 = rnorm(1:10),
v3 = runif(10, 1, 1000),
v4 = runif(10, 1, 1000))
###
# I tend to prefer the apply family of functions to replace loops where possible.
# This function takes two inputs, dataframes of dependent and independent variables.
# the apply function here takes the x_df and applies the following anonymous function to each column
# so for each column in x_df it performs a lm against the first column of y_df
lm_func <- function(y_df, x_df) {
out <- apply(x_df, MARGIN = 2, function(x) {
lm(y_df[, 1] ~ x)
})
return(out)
}
results_list <- lm_func(y, x)
# the output is one list element per lm. I like to keep the whole lm output just in case you need to go back to it
# we can then turn that list back into a dataframe using rbindlist from data.table
# and get what I think is your desired output using glance from broom
library(data.table)
library(broom)
results_glance <- rbindlist(lapply(results_list, glance), idcol = "var_name")
# or keep it as a list if you wish
results_list_glance <- lapply(results_list, glance)
# to run the function using a single y argument but multiple x arguments you can use mapply
results_list_m <- mapply(lm_func,
y_df = list(y, y2),
MoreArgs = list( # other arguments you want to keep fixed
x_df = x
),
SIMPLIFY = F
)
# the output is a little fiendish because it will be a list of lists
# we can include the rbindlist and glance into the function to make the output a little simpler:
lm_func_bind <- function(y_df, x_df) {
out <- apply(x_df, MARGIN = 2, function(x) {
lm(y_df[, 1] ~ x)
})
out <- rbindlist(lapply(out, glance), idcol = "var_name")
return(out)
}
results_glance_df <- lm_func_bind(y, x)
results_list_dfs <- mapply(lm_func_bind,
y_df = list(y, y2),
MoreArgs = list( # other arguments you want to keep fixed
x_df = x
),
SIMPLIFY = F
)
让我知道我是否可以做得更好。如果您不熟悉apply
和rbindlist
之类的某些功能,则值得查阅其文档。干杯!
P.S。由于成功的机会,通常重复线性模型并不理想。不过,这更多是统计问题,而不是编码问题!