R:比较数据和计数,但“水平集因素不同”错误

问题描述 投票:0回答:1

我想比较两个数据帧,但R显示“级别因素集不同”错误。

这是数据帧A.

structure(list(V1 = c(101.21, 101.29, 101.3, 101.9, 102.21, 102.29, 
102.31, 102.39, 102.9, 103.1, 103.91, 103.92, 104.1, 104.2, 105.11, 
105.12, 105.13, 105.14, 105.15, 105.94, 105.99, 106.11, 106.12, 
106.13, 106.14, 106.19, 106.2, 106.31, 106.32, 106.33, 106.39, 
106.41, 106.49, 106.9, 201.1, 201.2, 201.3, 202.1, 202.2, 202.3, 
203.11, 203.12, 203.19, 203.21, 203.22, 203.29, 204.1, 204.21, 
204.22, 204.23), V2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("CC", 
"CC or CTH+RVC<=50", "CC or CTSH+RVC<=50", "CC or RVC<=50", "CC+ECTC", 
"CC+ECTC or CTH+ECTC+RVC<=50", "CC+ECTC+TECH", "CC+RVC<=50", 
"CTh", "CTH", "CTH or CTH+RVC<=50", "CTH or CTSH+RVC<=50", "CTH or RVC<=50", 
"CTH+ECTC", "CTH+ECTC or CTH+RVC<=50", "CTH+ECTC or CTSH+RVC<=50", 
"CTH+ECTC or RVC<=50", "CTH+ECTC+RVC<=50", "CTH+RVC<=50", "CTSH", 
"CTSH or RVC<=50", "CTSH+ECTC", "CTSH+ECTC+RVC<=50", "CTSH+RVC<=50", 
"RVC<=50", "RVC>50"), class = "factor")), .Names = c("V1", "V2"
), row.names = c(NA, 50L), class = "data.frame")

这是数据帧B.

structure(list(V1 = c(101.21, 101.29, 101.3, 101.9, 102.21, 102.29, 
102.31, 102.39, 102.9, 103.1, 103.91, 103.92, 104.1, 104.2, 105.11, 
105.12, 105.13, 105.14, 105.15, 105.94, 105.99, 106.11, 106.12, 
106.13, 106.14, 106.19, 106.2, 106.31, 106.32, 106.33, 106.39, 
106.41, 106.49, 106.9, 201.1, 201.2, 201.3, 202.1, 202.2, 202.3, 
203.11, 203.12, 203.19, 203.21, 203.22, 203.29, 204.1, 204.21, 
204.22, 204.23), V2 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("", "CC", 
"CC ", "CC or CTH+RVC>=50", "CC+ECTC", "CC+ECTC or CTH+RVC>=50", 
"CC+ECTC+TECH", "CC+RVC(45)", "CC+TECH", "CTH", "CTH ", "CTH or CTSH+ECTC+RVC>=50", 
"CTH or CTSH+RVC<50", "CTH or CTSH+RVC<50 ", "CTH or CTSH+RVC>=50", 
"CTH or CTSH+RVC>=50 ", "CTH or RVC>=50", "CTH+ECTC", "CTH+ECTC ", 
"CTH+ECTC or CTH+RVC<50", "CTH+ECTC or CTH+RVC>=50", "CTH+ECTC or CTSH+RVC<50", 
"CTH+ECTC or CTSH+RVC>=50", "CTH+ECTC or RVC>=50", "CTH+RVC(45)", 
"CTH+RVC<50", "CTH+RVC>=50", "CTH+RVC>=50 ", "CTH+RVC>50", "cTSH", 
"CTSH", "CTSH or RVC(45)", "CTSH or RVC<50", "CTSH or RVC>=50", 
"CTSH+ECTC", "CTSH+RVC<50", "CTSH+RVC>=50", "RVC>=50 ", "RVC>50"
), class = "factor")), .Names = c("V1", "V2"), row.names = c(NA, 
50L), class = "data.frame")

这是计数的代码。

  for(j in 2:50){
      if(a[j,2] == b[j,2] ){
        count = count+1
      }
  }

我收到此错误消息

Error in Ops.factor(a[j, 2], b[j, 2]) : 
  level sets of factors are different
r database compare
1个回答
1
投票

你可以这样做,正如@akrun指出的那样,你的V2列是A和B中的因子,你必须将它转换为字符,你可以使用Map逐个元素进行比较:

您可以使用str函数str(A)str(B)检查数据结构(它将告诉您哪些列在因子/字符/数字中)。你也可以使用dplyr::glimpse做同样的事情。

> A$V2 <- as.character(A$V2)
> B$V2 <- as.character(B$V2)
> Map(`==`, A, B)

如果您想要计算列的匹配值,您也可以扩展Map,如下所示。

Map(`sum`,Map(`==`, A, B))

如果您想从某一行开始,可以编辑Map功能,如下所示:

Map(`==`, A[2:nrow(A),], B[2:nrow(B),])

显示以防需要从第2行开始。

© www.soinside.com 2019 - 2024. All rights reserved.