我有一个小题词(引用自How to create dummy variables per group of another variable in tidyverse)
```input_data <- tribble( ~Subcat, ~Date, ~COMM1,~COMM2,~UOM,~AUC_TYPE,
#--|----------|-----|-----|----|----------------|
1, 2017-03-07, 40750,41400,"MT","English",
1, 2017-03-15, 40750,40000,"MT","English",
2, 2017-10-16, 41000,40500,"METER","Yankee",
2, 2017-11-06, 41010,40510,"METER","Yankee",
2, 2019-01-26, 50010,50510,"METER","English",
3, 2017-03-07, 40750,41400,"MT","English",
3, 2018-05-26, 50010,50510,"MT","English",
3, 2019-01-21, 40750,40200,"MT","English",
3, 2019-01-21, 40750,40200,"MT","English",
4, 2017-11-08, 37500,39000,"LTR","Dynamic Sealbid",
4, 2017-11-08, 37500,39000,"LTR","Dynamic Sealbid",
```)
```tib
# desired output
output_data <- tribble( ~Subcat, ~Date, ~COMM1, ~COMM2, ~UOM_MT, ~UOM_METER ,~UOM_LTR, ~AUC_TYPE_English, ~AUC_TYPE_Dynamic Sealbid, ~AUC_TYPE_Yankee,
#--|----------|-----|-----|-|-|-|-|-|-|
1, 2017-03-07, 40750,41400,1,0,0,1,0,0,
1, 2017-03-15, 40750,40000,1,0,0,1,0,0,
2, 2017-10-16, 41000,40500,0,1,0,0,0,1,
2, 2017-11-06, 41010,40510,0,1,0,0,0,1,
2, 2019-01-26, 50010,50510,0,1,0,1,0,0,
3, 2017-03-07, 40750,41400,1,0,0,1,0,0,
3, 2018-05-26, 50010,50510,1,0,0,1,0,0,
3, 2019-01-21, 40750,40200,1,0,0,1,0,0,
3, 2019-01-21, 40750,40200,1,0,0,1,0,0,
4, 2017-11-08, 37500,39000,0,0,1,0,1,0,
4, 2017-11-08, 37500,39000,0,0,1,0,1,0,
)
您可以做:
library(dplyr)
library(tidyr)
input_data %>%
#Get unique row number
mutate(row = row_number()) %>%
#Get data in long format
pivot_longer(cols = c(UOM, AUC_TYPE)) %>%
#Combine columns
unite(col, name, value) %>%
#Get data in wide format
pivot_wider(names_from = col, values_from = col, values_fn = list(col = ~1),
values_fill = list(col = 0)) %>%
#Remove row column
select(-row)
# A tibble: 11 x 10
# Subcat Date COMM1 COMM2 UOM_MT AUC_TYPE_English UOM_METER AUC_TYPE_Yankee UOM_LTR `AUC_TYPE_Dynamic Sealbid`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 2007 40750 41400 1 1 0 0 0 0
# 2 1 1999 40750 40000 1 1 0 0 0 0
# 3 2 1991 41000 40500 0 0 1 1 0 0
# 4 2 2000 41010 40510 0 0 1 1 0 0
# 5 2 1992 50010 50510 0 1 1 0 0 0
# 6 3 2007 40750 41400 1 1 0 0 0 0
# 7 3 1987 50010 50510 1 1 0 0 0 0
# 8 3 1997 40750 40200 1 1 0 0 0 0
# 9 3 1997 40750 40200 1 1 0 0 0 0
#10 4 1998 37500 39000 0 0 0 0 1 1
#11 4 1998 37500 39000 0 0 0 0 1 1
library(dplyr)
df %>%
rename_at(c("UOM", "AUC_TYPE"), paste0, "_") %>%
mutate_at(vars(ends_with("_")), as.factor) %>%
mutate_at(vars(ends_with("_")),
~ C(., contrasts(., contrasts = F), n_distinct(.))) %>%
model.matrix(~ ., data = .) %>%
as_tibble %>%
select(-1)
# # A tibble: 11 x 10
# Subcat Date COMM1 COMM2 UOM_LTR UOM_METER UOM_MT `AUC_TYPE_Dynamic Sealbid` AUC_TYPE_English AUC_TYPE_Yankee
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 2007 40750 41400 0 0 1 0 1 0
# 2 1 1999 40750 40000 0 0 1 0 1 0
# 3 2 1991 41000 40500 0 1 0 0 0 1
# 4 2 2000 41010 40510 0 1 0 0 0 1
# 5 2 1992 50010 50510 0 1 0 0 1 0
# 6 3 2007 40750 41400 0 0 1 0 1 0
# 7 3 1987 50010 50510 0 0 1 0 1 0
# 8 3 1997 40750 40200 0 0 1 0 1 0
# 9 3 1997 40750 40200 0 0 1 0 1 0
# 10 4 1998 37500 39000 1 0 0 1 0 0
# 11 4 1998 37500 39000 1 0 0 1 0 0