为循环添加条件,以使用 R 中的 ClassInt 为 n 类较少的区域生成类间隔

问题描述 投票:0回答:1

我在使用 R 中的 classInt 库将类分配给不符合循环函数中的条件的间隔时遇到问题。如果数据帧子集中的行小于类的数量,如何包含自动类的条件( n) 请为班级间隔定义好吗?在我的例子中,n=3。这是一个示例,其中包含您可以在 R 中创建的示例数据框

library(classInt)
library(rlist)
library(dplyr)

##Create dataframe 
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF  <- data.frame(Country, Time, Area)

This should produce this dataframe:


      Country Time Area
 1  Australia   21    A
 2      Italy   18    A
 3       Peru   17    A
 4      China   10    A
 5  Australia   10    B
 6      Italy   15    B
 7       Peru   27    B
 8      China    0    B
 9  Australia    2    C
 10     Italy    4    C
 11      Peru    5    C
 12     China    7    C
 13   Nigeria    4    D
 14 Australia    8    D
 15     Italy    9    D
 16      Peru   10    D
 17     China    5    D


## Split by Country
NewXL <- split(DF,DF$Country)

## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=5, 
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
skip_to_next <<- TRUE})
if(skip_to_next) { next } 
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)

NewXL2[[i]] <-AB }

此结果是 5 个国家/地区的列表,其中尼日利亚为空,因为它只有一行(至少 3 个国家可能是创建间隔的理想选择)。有没有一种方法可以编写循环代码,以便我可以为循环中只有一行的任何数据帧定义要添加的类和范围最小值?在本例中,尼日利亚应该只有一行,因此我可以将 3 类(最大值)自动分配给单行,范围为 [0,4)。下面是循环输出的样子。

  NewXL2
  [[1]]
       Country Time Area Range_Abs Class_Abs
  1  Australia   21    A [15.5,21]         3
  5  Australia   10    B  [5,15.5)         2
  9  Australia    2    C     [2,5)         1
  14 Australia    8    D  [5,15.5)         2

  [[2]]
     Country Time Area Range_Abs Class_Abs
  4    China   10    A  [8.5,10]         3
  8    China    0    B   [0,2.5)         1
  12   China    7    C [2.5,8.5)         2
  17   China    5    D [2.5,8.5)         2

  [[3]]
     Country Time Area Range_Abs Class_Abs
  2    Italy   18    A   [12,18]         3
  6    Italy   15    B   [12,18]         3
  10   Italy    4    C   [4,6.5)         1
  15   Italy    9    D  [6.5,12)         2

  [[4]]
  NULL

  [[5]]
     Country Time Area Range_Abs Class_Abs
  3     Peru   17    A [13.5,22)         2
  7     Peru   27    B   [22,27]         3
  11    Peru    5    C  [5,13.5)         1
  16    Peru   10    D  [5,13.5)         1

这就是尼日利亚数据框在循环后应该是什么样子:

[[4]]
    Country Time Area Range_Abs Class_Abs
 13 Nigeria   10    D     [0,4)         3  

 #Merge all lists into long dataframe with class intervals
 NewXL2b <- list.rbind(NewXL2)  
r loops for-loop grouping intervals
1个回答
0
投票

您可以在循环中使用 if/else:

library(classInt)

data <- data.frame(
  country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
  time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
  area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)

split_data <- split(data, data$country)

result <- list()

for (i in 1:length(split_data)) {
  AB <- split_data[[i]]
  
  if(nrow(AB) == 1) {
    AB$range_abs = factor(sprintf("[%s]", AB$time))
    AB$class_abs = 1L
  } else {
    skip_to_next <- FALSE
  
    tryCatch(
      Classes <- classIntervals(
        AB$time, 
        n = 5, 
        cutlabels = FALSE,
        style = 'fisher',
        factor = FALSE,
        warnSmallN = FALSE,
        warnLargeN = FALSE
      ), 
      error = function(e) { 
        skip_to_next <<- TRUE
      }
    )
  
    if(skip_to_next) { next } 
  
    AB$range_abs <- classify_intervals(AB$time, 3, "fisher", factor = TRUE)
    AB$class_abs <- classify_intervals(AB$time, 3, "fisher", factor = FALSE)
  }

  result[[i]] <- AB 
}

result
#> [[1]]
#>      country time area range_abs class_abs
#> 1  Australia   21    A [15.5,21]         3
#> 5  Australia   10    B  [5,15.5)         2
#> 9  Australia    2    C     [2,5)         1
#> 14 Australia    8    D  [5,15.5)         2
#> 
#> [[2]]
#>    country time area range_abs class_abs
#> 4    China   10    A  [8.5,10]         3
#> 8    China    0    B   [0,2.5)         1
#> 12   China    7    C [2.5,8.5)         2
#> 17   China    5    D [2.5,8.5)         2
#> 
#> [[3]]
#>    country time area range_abs class_abs
#> 2    Italy   18    A   [12,18]         3
#> 6    Italy   15    B   [12,18]         3
#> 10   Italy    4    C   [4,6.5)         1
#> 15   Italy    9    D  [6.5,12)         2
#> 
#> [[4]]
#>    country time area range_abs class_abs
#> 13 Nigeria    4    D       [4]         1
#> 
#> [[5]]
#>    country time area range_abs class_abs
#> 3     Peru   17    A [13.5,22)         2
#> 7     Peru   27    B   [22,27]         3
#> 11    Peru    5    C  [5,13.5)         1
#> 16    Peru   10    D  [5,13.5)         1

创建于 2024-07-03,使用 reprex v2.1.0.9000

© www.soinside.com 2019 - 2024. All rights reserved.