我在使用 R 中的 classInt 库将类分配给不符合循环函数中的条件的间隔时遇到问题。如果数据帧子集中的行小于类的数量,如何包含自动类的条件( n) 请为班级间隔定义好吗?在我的例子中,n=3。这是一个示例,其中包含您可以在 R 中创建的示例数据框
library(classInt)
library(rlist)
library(dplyr)
##Create dataframe
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru',
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru',
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF <- data.frame(Country, Time, Area)
This should produce this dataframe:
Country Time Area
1 Australia 21 A
2 Italy 18 A
3 Peru 17 A
4 China 10 A
5 Australia 10 B
6 Italy 15 B
7 Peru 27 B
8 China 0 B
9 Australia 2 C
10 Italy 4 C
11 Peru 5 C
12 China 7 C
13 Nigeria 4 D
14 Australia 8 D
15 Italy 9 D
16 Peru 10 D
17 China 5 D
## Split by Country
NewXL <- split(DF,DF$Country)
## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=5,
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) {
skip_to_next <<- TRUE})
if(skip_to_next) { next }
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)
NewXL2[[i]] <-AB }
此结果是 5 个国家/地区的列表,其中尼日利亚为空,因为它只有一行(至少 3 个国家可能是创建间隔的理想选择)。有没有一种方法可以编写循环代码,以便我可以为循环中只有一行的任何数据帧定义要添加的类和范围最小值?在本例中,尼日利亚应该只有一行,因此我可以将 3 类(最大值)自动分配给单行,范围为 [0,4)。下面是循环输出的样子。
NewXL2
[[1]]
Country Time Area Range_Abs Class_Abs
1 Australia 21 A [15.5,21] 3
5 Australia 10 B [5,15.5) 2
9 Australia 2 C [2,5) 1
14 Australia 8 D [5,15.5) 2
[[2]]
Country Time Area Range_Abs Class_Abs
4 China 10 A [8.5,10] 3
8 China 0 B [0,2.5) 1
12 China 7 C [2.5,8.5) 2
17 China 5 D [2.5,8.5) 2
[[3]]
Country Time Area Range_Abs Class_Abs
2 Italy 18 A [12,18] 3
6 Italy 15 B [12,18] 3
10 Italy 4 C [4,6.5) 1
15 Italy 9 D [6.5,12) 2
[[4]]
NULL
[[5]]
Country Time Area Range_Abs Class_Abs
3 Peru 17 A [13.5,22) 2
7 Peru 27 B [22,27] 3
11 Peru 5 C [5,13.5) 1
16 Peru 10 D [5,13.5) 1
这就是尼日利亚数据框在循环后应该是什么样子:
[[4]]
Country Time Area Range_Abs Class_Abs
13 Nigeria 10 D [0,4) 3
#Merge all lists into long dataframe with class intervals
NewXL2b <- list.rbind(NewXL2)
您可以在循环中使用 if/else:
library(classInt)
data <- data.frame(
country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'),
time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5),
area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)
split_data <- split(data, data$country)
result <- list()
for (i in 1:length(split_data)) {
AB <- split_data[[i]]
if(nrow(AB) == 1) {
AB$range_abs = factor(sprintf("[%s]", AB$time))
AB$class_abs = 1L
} else {
skip_to_next <- FALSE
tryCatch(
Classes <- classIntervals(
AB$time,
n = 5,
cutlabels = FALSE,
style = 'fisher',
factor = FALSE,
warnSmallN = FALSE,
warnLargeN = FALSE
),
error = function(e) {
skip_to_next <<- TRUE
}
)
if(skip_to_next) { next }
AB$range_abs <- classify_intervals(AB$time, 3, "fisher", factor = TRUE)
AB$class_abs <- classify_intervals(AB$time, 3, "fisher", factor = FALSE)
}
result[[i]] <- AB
}
result
#> [[1]]
#> country time area range_abs class_abs
#> 1 Australia 21 A [15.5,21] 3
#> 5 Australia 10 B [5,15.5) 2
#> 9 Australia 2 C [2,5) 1
#> 14 Australia 8 D [5,15.5) 2
#>
#> [[2]]
#> country time area range_abs class_abs
#> 4 China 10 A [8.5,10] 3
#> 8 China 0 B [0,2.5) 1
#> 12 China 7 C [2.5,8.5) 2
#> 17 China 5 D [2.5,8.5) 2
#>
#> [[3]]
#> country time area range_abs class_abs
#> 2 Italy 18 A [12,18] 3
#> 6 Italy 15 B [12,18] 3
#> 10 Italy 4 C [4,6.5) 1
#> 15 Italy 9 D [6.5,12) 2
#>
#> [[4]]
#> country time area range_abs class_abs
#> 13 Nigeria 4 D [4] 1
#>
#> [[5]]
#> country time area range_abs class_abs
#> 3 Peru 17 A [13.5,22) 2
#> 7 Peru 27 B [22,27] 3
#> 11 Peru 5 C [5,13.5) 1
#> 16 Peru 10 D [5,13.5) 1
创建于 2024-07-03,使用 reprex v2.1.0.9000