在开始之前先声明一下,我对 R 还很陌生;因此,如果我使用了错误的术语,请随时纠正我,如果需要更多信息来解决此问题,请告诉我。
我目前正在研究一个数据集,下面是生成其中一小部分的代码(不包括一些变量和大多数观察结果):
structure(list(ED1_SEN = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), Date = c("09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020", "09/12/2020",
"09/12/2020", "09/12/2020"), Household_ID = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Trap = c("MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET",
"MET"), Trap_type = c("MET", "MET", "MET", "MET", "MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET", "MET",
"MET", "MET", "MET", "MET", "MET", "MET"), Location = c("Indoor",
"Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor",
"Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor",
"Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor",
"Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor", "Indoor",
"Indoor"), Round = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), Season = c("Dry", "Dry", "Dry", "Dry", "Dry",
"Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry",
"Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry",
"Dry", "Dry", "Dry", "Dry", "Dry", "Dry", "Dry"), Volunteer_initial = c("AY",
"AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY",
"AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY", "AY",
"AY", "AY", "AY", "AY", "AY", "AY", "AY"), Temperature = c(29.2,
29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2,
29.2, 27.6, 27.6, 27.6, 27.6, 27.6, 27.6, 27.6, 27.6, 28.4, 28.4,
28.4, 28.4, 28.4, 28.2, 28.2, 28.2, 28.2), Humidity = c(75L,
75L, 75L, 75L, 75L, 75L, 75L, 75L, 75L, 75L, 75L, 75L, 75L, 83L,
83L, 83L, 83L, 83L, 83L, 83L, 83L, 78L, 78L, 78L, 78L, 78L, 79L,
79L, 79L, 79L), Wind_speed = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
Latitude = c(-8.1473, -8.1473, -8.1473, -8.1473, -8.1473,
-8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473,
-8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473,
-8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473, -8.1473,
-8.1473, -8.1473, -8.1473, -8.1473), Longitude = c(39.1771,
39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771,
39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771,
39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771,
39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771, 39.1771,
39.1771), Elevation = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Grouped_species = c("CULEX", "CULEX",
"CULEX", "CULEX", "CULEX", "CULEX", "CULEX", "CULEX", "CULEX",
"CULEX", "CULEX", "CULEX", "CULEX", "CULEX", "CULEX", "CULEX",
"CULEX", "CULEX", "CULEX", "CULEX", "CULEX", "MANSONIA",
"MANSONIA", "MANSONIA", "MANSONIA", "MANSONIA", "MANSONIA",
"MANSONIA", "MANSONIA", "MANSONIA"), Group_malaria_transmissionC = c("Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors", "Non_malaria_vectors",
"Non_malaria_vectors", "Non_malaria_vectors")), row.names = c(NA,
30L), class = "data.frame")
我希望 R 将“Grouped_species”列转置为不同的列,每个列代表不同的蚊子物种,并且我希望它在收集到特定物种的情况下给出值 1,如果没有收集到则给出值 0(稍后我将对数据进行分组)将不同房屋收集到的蚊子总数相加)。
为此,我使用了pivot_wider函数。然而,据我所知,数据集中的每一行=一只收集的蚊子,我想添加一个新列,将其命名为“Count”,并用“1”填充它,以便稍后将其用于“values_from”参数。为此,我使用了这段代码:
Count <- as.character('1')
house1_comp <- data.frame(cbind(house1_comp, Count))
添加新列后,我使用此代码来获取宽格式:
house1_comp_mod <- house1_comp %>%
pivot_wider(names_from = Grouped_species,
values_from = Count)
结果表可以在附图中找到。
在这里,我可以看到新列“Culex”和“Mansonia”的值是一个列表,而不是一个字符。另外,我可以看到我丢失了大部分观察结果(在这个阶段我必须保留相同数量的观察结果)。
我尝试将新列的值添加为数字、字符和整数,当我使用pivot_wider时,所有值都变成了“列表”。
如果您对正在发生的事情有任何建议或帮助,我将不胜感激。
查看您的数据集,按家庭、温度和湿度进行分组是有意义的,因为这些列实际上有所不同。 然后,您可以计算每个物种的出现次数并扩大范围:
install.packages("rtools", "tidyr", "dplyr")
library(tidyr)
library(dplyr)
wider_list <- list %>%
group_by(Household_ID, Temperature, Humidity) %>% # list all columns you want to include in the final table :)
count(Grouped_species) %>% # Count occurrences of each species
pivot_wider(
names_from = Grouped_species, # New column names from Grouped_species
values_from = n, # Fill with the counts
values_fill = 0 # Fill missing values with 0
)
这将生成以下输出: 输出