我使用chatgpt进行一些编码(羞愧我),我有这个功能我不明白:
escalc.df <- allmetadata %>%
group_by(!!sym(testvariable)) %>%
summarise(group = first(!!sym(testvariable)),
ai = sum(!!sym(hydratelevel) == "Present" & modelpresence > 0),
bi = sum(!!sym(hydratelevel) == "Present" & modelpresence == 0),
ci = sum(!!sym(hydratelevel) == "Absent" & modelpresence > 0),
di = sum(!!sym(hydratelevel) == "Absent" & modelpresence == 0))
我不知道从哪里开始,因为我不完全理解这个功能。有人可以向我解释这段代码,甚至分享一个缩短/更简化的版本吗?
我期望 df
allmetadata
按 testvariable
分组(之前设置以匹配特定的 allmetadata
列),然后我想对 allmetadata$modelpresence
中每个变量 > 或 = 为 0 时的实例求和allmetadata$hydratelevel
。这些值应输出到具有四列的新 df
escalc.df
中:
$ai
、
$bi
、
$ci
和
$di
。例如,
testvariable
可以是
$surfacelith
,
hydratelevel
可以是
$AreaKnownHydrate
。
> dput(head(allmetadata))
structure(list(feature.id = c("AB094456", "AB094457", "AB094458",
"AB094459", "AB094460", "AB094461"), seq = c("cct", "cct", "cct",
"cct", "cct", "cct"), author = c("Inagaki", "Inagaki", "Inagaki",
"Inagaki", "Inagaki", "Inagaki"), yearPub = c(2003L, 2003L, 2003L,
2003L, 2003L, 2003L), yearCollected = c(2001L, 2001L, 2001L,
2001L, 2001L, 2001L), ocean = c("Pacific", "Pacific", "Pacific",
"Pacific", "Pacific", "Pacific"), region = c("SeaOkhotsk", "SeaOkhotsk",
"SeaOkhotsk", "SeaOkhotsk", "SeaOkhotsk", "SeaOkhotsk"), location = c("ShiretokoPeninsula",
"ShiretokoPeninsula", "ShiretokoPeninsula", "ShiretokoPeninsula",
"ShiretokoPeninsula", "ShiretokoPeninsula"), waterType = c("marine",
"marine", "marine", "marine", "marine", "marine"), methaneForm = c("HYD",
"HYD", "HYD", "HYD", "HYD", "HYD"), waterDepth = c(1225, 1225,
1225, 1225, 1225, 1225), sedDepth = c("UNK", "UNK", "UNK", "UNK",
"UNK", "UNK"), latitude = c(44.5275, 44.5275, 44.5275, 44.5275,
44.5275, 44.5275), longitude = c(145.0041, 145.0041, 145.0041,
145.0041, 145.0041, 145.0041), sedProfile = c("UNK", "UNK", "UNK",
"UNK", "UNK", "UNK"), sampleType = c("sediment", "sediment",
"sediment", "sediment", "sediment", "sediment"), porosity = c(81.5954,
81.5954, 81.5954, 81.5954, 81.5954, 81.5954), surfaceTOC = c(2.1923,
2.1923, 2.1923, 2.1923, 2.1923, 2.1923), surfacelith = c("clay",
"clay", "clay", "clay", "clay", "clay"), locationUSGSdatabase = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
), LatUSGSdatabase = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), LongUSGSdatabase = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), AreaKnownHydrate = c("Absent",
"Absent", "Absent", "Absent", "Absent", "Absent"), ExactHydratePresent = c("UNK",
"UNK", "UNK", "UNK", "UNK", "UNK"), hydInfoSource = c("UNK",
"UNK", "UNK", "UNK", "UNK", "UNK"), modelpresence = c(0, 0, 0,
0, 0, 0)), row.names = c(NA, 6L), class = "data.frame")
myFun <- function(testvariable, hydratelevel) {
escalc.df <- allmetadata %>%
group_by(!!sym(testvariable)) %>%
summarise(group = first(!!sym(testvariable)),
ai = sum(!!sym(hydratelevel) == "Present" & modelpresence > 0),
bi = sum(!!sym(hydratelevel) == "Present" & modelpresence == 0),
ci = sum(!!sym(hydratelevel) == "Absent" & modelpresence > 0),
di = sum(!!sym(hydratelevel) == "Absent" & modelpresence == 0))
}
returned.df <- myFun("surfacelith", "AreaKnownHydrate")
returned.df