我想使用 ictreg 使用项目计数技术对调查数据进行多元回归分析,但没有成功,因为我不断看到以下错误消息“xtfrm.data.frame(x) 中的错误:无法 xtfrm 数据帧”。我对 R 很陌生,不知道如何解决这个问题。
请在下面找到我运行代码的示例数据(只有 20 个观察值,我无权访问剩余的数据集)以及代码。
structure(list(sex = structure(c(1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1), label = "1=female)", format.stata = "%9.0g", labels = c(`0. Male` = 0,
`1. Female` = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), age = structure(c(2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743), label = "Age (year)", format.stata = "%2.0f"),
P217_OCCUPATION = structure(c(9, 5, 7, 5, 5, 9, 5, 5, 5,
5, 8, 9, 9, 9, 5, 5, 8, 9, 5, 5), label = "P217 Occupation", format.stata = "%52.0f", labels = c(`1. LEGISLATORS,SENIOR OFFICIALS AND MANAGERS` = 1,
`2. PROFESSIONALS` = 2, `3. TECHNICIANS AND ASSOCIATE PROFESSIONALS` = 3,
`4. CLERKS` = 4, `5. SERVICE WORKERS AND SHOP AND MARKET SALES WORKERS` = 5,
`6. SKILLED AGRICULTURAL AND FISHERY WORKERS` = 6, `7. CRAFTS AND RELATED TRADES WORKERS` = 7,
`8. PLANT AND MACHINE OPERATORS ASSEMBLERS` = 8, `9. ELEMENTARY OCCUPATION` = 9,
`10. NOT STATED` = 10), class = c("haven_labelled", "vctrs_vctr",
"double")), industryGroup = structure(c(7, 5, 7, 5, 5, 7,
5, 7, 7, 7, 6, 6, 7, 8, 5, 7, 7, 7, 5, 5), label = "P3102 Industry Group", format.stata = "%45.0f", labels = c(`1. Agriculture, Hunting,.Forestry and Fishing` = 1,
`2. Mining and Quarrying` = 2, `3. Manufacturing` = 3, `4. Construction` = 4,
`5. Trade, Hotels & Restaurants` = 5, `6. Transport` = 6,
`7. Community & Personal Services` = 7, `8. Not Stated` = 8
), class = c("haven_labelled", "vctrs_vctr", "double")),
ownership = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), label = "What is the form of ownership of this activity/enterprise?", format.stata = "%46.0f", labels = c(`1. Sole ownership or members of the household` = 1,
`2. Partnership` = 2, `3. Other /specify/` = 3, `9. Not stated` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")),
bookAcct = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Have book of account", format.stata = "%34.0f", labels = c(`1. Yes some accounts, but not full` = 1,
`2. No, no accounts kept` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), easyAvoid = structure(c(1, 1, NA,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, NA, 0, 0, 0, 0, 1, 1), label = "Difficult evade (y/n)", format.stata = "%29.0f", labels = c(`1. Very easy` = 1,
`2. Easy` = 2, `3. Neither easy nor difficult` = 3, `4. Difficult` = 4,
`5. Very difficult` = 5, `98. .Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
easyComply = structure(c(0, 0, 0, 0, 1, 0, NA, 0, 0, 0, 1,
0, 0, NA, 0, 1, 0, 0, 0, 0), label = "Easy comply tax obl (y/n)", format.stata = "%29.0f", labels = c(`1. Very easy` = 1,
`2. Easy` = 2, `3. Neither easy nor difficult` = 3, `4. Difficult` = 4,
`5. Very difficult` = 5, `98. .Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
trustGVT = structure(c(0, 1, NA, 0, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 0, 0, NA, 0), label = "Trust government (y/n)", format.stata = "%24.0f", labels = c(`1. A great deal of trust` = 1,
`2. Trust somewhat` = 2, `3. Trust just a little` = 3, `4. No trust at all` = 4,
`5. Have never heard of` = 5, `98. Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
moral0 = structure(c(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), label = "Should always pay taxes (y/n)", format.stata = "%46.0f", labels = c(`1. Agree much more with the FIRST statement` = 1,
`2. Agree a bit more with the FIRST statement` = 2, `3. Agree a bit more with the SECOND statement` = 3,
`4. Agree much more with the SECOND statement` = 4, `5. Agree with neither statement` = 5,
`888. Refuse to answer` = 888, `999. Don?t know` = 999), class = c("haven_labelled",
"vctrs_vctr", "double")), opinion1 = structure(c(1, 1, 0,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0), label = "Wrong not paying tax (y/n)", format.stata = "%27.0f", labels = c(`1. Not wrong at all` = 1,
`2. Wrong but understandable` = 2, `3. Wrong and punishable` = 3,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), opinion2 = structure(c(1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, NA), label = "Tax auth right to make pay (y/n)", format.stata = "%29.0f", labels = c(`1. Completely agree` = 1,
`2. Somewhat agree` = 2, `3. Neither agree nor disagree` = 3,
`4. Somewhat disagree` = 4, `5. Completely disagree` = 5,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), moral1 = structure(c(0, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, NA), label = "Refuse to pay until better service (y/n)", format.stata = "%29.0f", labels = c(`1. Completely agree` = 1,
`2. Somewhat agree` = 2, `3. Neither agree nor disagree` = 3,
`4. Somewhat disagree` = 4, `5. Completely disagree` = 5,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), direct_reporting = structure(c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, NA, 0, 0), label = "Do you always pay all taxes you owe to the government?", format.stata = "%19.0f", labels = c(No = 0,
Yes = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), head = structure(c(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1), label = "Owner is HH head", format.stata = "%9.0g"),
norm = structure(c(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,
NA, 1, 1, 0, 0, 0, 0), label = "Social costs (y/n)", format.stata = "%27.0g", labels = c(`1. Very likely` = 1,
`2. Likely` = 2, `3. May or may not be likely` = 3, `4. Unlikely` = 4,
`5. Very unlikely` = 5, `98. Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
treat = structure(c(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
0, 1, 1, 0, 1, 0, 1), label = "RECODE of expgroup", format.stata = "%12.0g", labels = c(Control = 0,
Treatment = 1), class = c("haven_labelled", "vctrs_vctr",
"double")), married = structure(c(1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0), label = "Married", format.stata = "%9.0g"),
secondary = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0), format.stata = "%9.0g"), log_hhsizeTot = structure(c(1.0986123085022,
1.0986123085022, 1.7917594909668, 0.6931471824646, 0.6931471824646,
1.0986123085022, 1.3862943649292, 0, 0, 1.3862943649292,
0.6931471824646, 1.3862943649292, 1.3862943649292, 1.3862943649292,
0.6931471824646, 1.3862943649292, 1.0986123085022, 0, 0,
1.3862943649292), format.stata = "%9.0g"), log_GROSS_VALUE = structure(c(11.1462001800537,
10.1105012893677, 9.07107830047607, 9.61580562591553, 9.50300979614258,
8.4990291595459, 11.0020999908447, 9.80972576141357, 9.9776668548584,
10.2691717147827, 11.0974102020264, 10.4744672775269, 9.95227813720703,
8.64822101593018, 10.1464338302612, 11.1844215393066, 10.5713167190552,
11.0974102020264, 8.69951438903809, 11.0974102020264), format.stata = "%9.0g"),
log_REVENUFROMSALE = structure(c(11.1418619155884, 10.1105012893677,
9.07107830047607, 9.61580562591553, 9.57498359680176, 8.4763708114624,
11.0020999908447, 9.80145454406738, 9.95418071746826, 10.2691717147827,
11.0974102020264, 10.4744672775269, 9.95227813720703, 8.64822101593018,
10.1345996856689, 11.1844215393066, 10.0858087539673, 10.8967390060425,
8.69951438903809, 11.0974102020264), format.stata = "%9.0g"),
y = structure(c(3, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 3, 2, 3,
2, 1, 2, 2, 2, 3), format.stata = "%10.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
### Load the list experiment package (including required sandwich package)
library(sandwich)
library(list)
# Load package allowing to upload .dta
## First foreign to pull .dta
library(foreign)
## Then install haven package to upload .dta from >=Stata 13 versions
install.packages("haven")
library(haven)
## ####
## Table 1 - "Observed Data from the List Experiment"
rm(list=ls())
require(list)
## Load .dta data (change directory)
mydata <- read_dta("D:/XXXXX.dta")
## Drop observations with missing data from key variables
MISSING <- is.na(mydata$easyAvoid) |
is.na(mydata$easyComply) |
is.na(mydata$trustGVT) |
is.na(mydata$norm) |
is.na(mydata$sex) |
is.na(mydata$age) |
is.na(mydata$head) |
is.na(mydata$ownership) |
is.na(mydata$bookAcct) |
is.na(mydata$married) |
is.na(mydata$secondary) |
is.na(mydata$P217_OCCUPATION) |
is.na(mydata$log_hhsizeTot) |
is.na(mydata$log_GROSS_VALUE) |
is.na(mydata$log_REVENUFROMSALE)
# Count the number of rows flagged for deletion
sum(MISSING)
# Use ! to include those that are NOT missing
mydata_no_NA <- subset(mydata,
subset = !MISSING)
# Count the number of rows kept
nrow(mydata_no_NA)
table(mydata_no_NA$y, mydata_no_NA$treat)
## ####
## Table 2 - "Estimated Coefficients from the Logistic Regression Models"
### Fit standard design ML model
# Setting the sample
# chose P217_OCCUPATION, but can also try industryGroup. Need to make it into a categorical variable
mydata_no_NA$P217_OCCUPATION.f <- factor(mydata_no_NA$P217_OCCUPATION)
is.factor(mydata_no_NA$P217_OCCUPATION.f)
noboundary.results <- ictreg(y ~ easyAvoid + easyComply + trustGVT + norm + sex + age + head + ownership + bookAcct + married + secondary + P217_OCCUPATION.f + log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
treat = "treat", J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE)
## Enforcement + Trust
noboundary.results1 <- ictreg(y ~ easyAvoid + trustGVT, treat = "treat",
J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Facilitation + Trust
noboundary.results2 <- ictreg(y ~ easyAvoid + easyComply + trustGVT, treat = "treat",
J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Trust + Social norm
noboundary.results3 <- ictreg(y ~ easyAvoid + trustGVT + norm, treat = "treat",
J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Facilitation + Trust + Social norm
noboundary.results4 <- ictreg(y ~ easyAvoid + easyComply + trustGVT + norm, treat = "treat",
J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Facilitation + Trust + Covariates
noboundary.results5 <- ictreg(y ~ easyAvoid + easyComply + trustGVT + sex + age + head + ownership + bookAcct + married + secondary + P217_OCCUPATION.f + log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
treat = "treat", J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Trust + Social norm + Covariates
noboundary.results6 <- ictreg(y ~ easyAvoid + trustGVT + norm + sex + age + head + ownership + bookAcct + married + secondary + P217_OCCUPATION.f + log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
treat = "treat", J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Enforcement + Facilitation + Trust + Social norm + Covariates
noboundary.results7 <- ictreg(y ~ easyAvoid + easyComply + trustGVT + norm + sex + age + head + ownership + bookAcct + married + secondary + P217_OCCUPATION.f + log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
treat = "treat",J = 4, data = mydata_no_NA, method = "ml",
overdispersed = FALSE, subset = obs(noboundary.results))
## Outputting results
etable(noboundary.results1, noboundary.results2, noboundary.results3, noboundary.results4, noboundary.results5, noboundary.results6, noboundary.results7, fitstat = ~ n)
尝试了您的方法,但无法重现该错误。也许是由于使用 haven 包会产生一些奇怪的标签格式,处理起来很繁琐,建议使用
readstata13::read.dta13("D:/XXXXX.dta")
代替。
至少我可以向您展示一种更好的方法来过滤缺失以及如何避免过多的重复代码。
为了过滤掉缺失的内容,我们可以使用
complete.cases()
。
## define key vars
key_vars <- c("easyAvoid", "easyComply", "trustGVT", "norm", "sex", "age",
"head", "ownership", "bookAcct", "married", "secondary",
"P217_OCCUPATION", "log_hhsizeTot", "log_GROSS_VALUE",
"log_REVENUFROMSALE")
complete.cases(dat[key_vars])
现在给出一个布尔向量,允许对行进行子集化。
dat <- dat[complete.cases(dat[key_vars]), ] ## subset rows for complete cases in key_vars
## make factor
dat <- transform(dat, P217_OCCUPATION=as.factor(P217_OCCUPATION))
由于除了模型公式之外,对
list::ictreg()
的调用是相同的,因此我们可以 list()
这些,
## put formulae in named list
frml <- list(
## ...
noboundary0=y ~ easyAvoid + easyComply + trustGVT + norm + sex + age +
head + ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
## Enforcement + Trust
noboundary1=y ~ easyAvoid + trustGVT,
## Enforcement + Facilitation + Trust
noboundary2=y ~ easyAvoid + easyComply + trustGVT,
## Enforcement + Trust + Social norm
noboundary3=y ~ easyAvoid + trustGVT + norm,
## Enforcement + Facilitation + Trust + Social norm
noboundary4=y ~ easyAvoid + easyComply + trustGVT + norm,
## ...
noboundary5=y ~ ~ easyAvoid + easyComply + trustGVT + sex + age + head +
ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
## ...
noboundary6=y ~ easyAvoid + trustGVT + norm + sex + age + head + ownership +
bookAcct + married + secondary + P217_OCCUPATION + log_hhsizeTot +
log_GROSS_VALUE + log_REVENUFROMSALE,
## ...
noboundary7=y ~ easyAvoid + easyComply + trustGVT + norm + sex + age + head +
ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE
)
和
lapply()
在上面。对于 ictreg
在迭代中失败的情况(这将停止循环),我们可以使用 NA
提供 tryCatch
作为替代品。
## run regressions
library(list)
fits <- lapply(frml, \(x) {
tryCatch(ictreg(x, treat="treat", J=4, data=dat, method="lm",
overdispersed=FALSE),
error=\(e) NA)
})
fits
现在是包含回归结果的列表,
names(fits)
# [1] "noboundary0" "noboundary1" "noboundary2" "noboundary3" "noboundary4"
# [6] "noboundary5" "noboundary6" "noboundary7"
可以使用例如轻松访问
fits$noboundary1
# Item Count Technique Regression
#
# Call: ictreg(formula = x, data = dat, treat = "treat", J = 4, method = "lm",
# overdispersed = FALSE)
#
# Coefficient estimates
#
# sensitive.(Intercept) sensitive.easyAvoid sensitive.trustGVT
# -0.23076923 -0.03846154 -0.03846154
# control.(Intercept) control.easyAvoid control.trustGVT
# 2.23076923 0.53846154 -0.46153846
#
# Number of control items J set to 4. Treatment groups were indicated by '' and '' and the control group by ''.
summary(fits$noboundary1)
# Item Count Technique Regression
#
# Call: ictreg(formula = x, data = dat, treat = "treat", J = 4, method = "lm",
# overdispersed = FALSE)
#
# Sensitive item
# Est. S.E.
# (Intercept) -0.23077 0.70346
# easyAvoid -0.03846 0.76846
# trustGVT -0.03846 0.76846
#
# Control items
# Est. S.E.
# (Intercept) 2.23077 0.61947
# easyAvoid 0.53846 0.64029
# trustGVT -0.46154 0.64029
请注意,我使用了
method='lm'
,因为'ml'
失败了,可能是由于示例中的数据不足。所以,不确定你的错误到底发生在哪里,也许在 etable(fits$noboundary1)
但找不到该函数。