我有一个数据框:
df <- structure(list(date = structure(c(17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566), class = "Date"), hour = c("00", "01", "02", "03",
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "00", "01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "00", "01", "02", "03", "04", "05", "06", "07", "08",
"09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23"), offered = c(30L, 28L, 15L, 21L, 11L,
14L, 18L, 35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 52L, 40L, 66L,
84L, 69L, 75L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L,
28L, 37L, 50L, 46L, 31L, 45L, 45L, 39L, 31L, 48L, 69L, 91L, 117L,
74L, 66L, 60L, 37L, 20L, 31L, 15L, 26L, 18L, 12L, 21L, 42L, 107L,
118L, 138L, 137L, 93L, 109L, 102L, 91L, 102L, 76L, 76L, 70L,
68L, 74L, 55L, 54L, 28L, 19L, 23L, 12L, 16L, 12L, 18L, 39L, 96L,
119L, 111L, 95L, 65L, 81L, 67L, 76L, 64L, 64L, 68L, 71L, 54L,
65L, 51L, 41L), answered = c(30L, 28L, 15L, 21L, 11L, 14L, 18L,
35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 51L, 40L, 66L, 83L, 68L,
74L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L, 28L, 37L,
49L, 46L, 31L, 43L, 45L, 39L, 31L, 47L, 65L, 81L, 83L, 61L, 65L,
58L, 37L, 20L, 31L, 15L, 25L, 17L, 12L, 21L, 42L, 106L, 115L,
134L, 127L, 93L, 107L, 97L, 88L, 94L, 74L, 74L, 66L, 65L, 69L,
52L, 51L, 28L, 19L, 23L, 12L, 16L, 12L, 17L, 39L, 91L, 115L,
104L, 95L, 65L, 79L, 67L, 73L, 64L, 64L, 68L, 70L, 53L, 64L,
48L, 38L)), row.names = c(NA, -96L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(date = structure(c(17563,
17564, 17565, 17566), class = "Date"), .rows = list(1:24, 25:48,
49:72, 73:96)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
看起来像这样:
> head(df)
# A tibble: 6 x 4
# Groups: date [1]
date hour offered answered
<date> <chr> <int> <int>
1 2018-02-01 00 30 30
2 2018-02-01 01 28 28
3 2018-02-01 02 15 15
4 2018-02-01 03 21 21
5 2018-02-01 04 11 11
6 2018-02-01 05 14 14
我如何展开hour
列并保留每个日期的offered
和answered
值?
我尝试过使用tidyr::spread()
,但要么在结果中不断收到错误信息或大量的NA
值。此外,我注意到它预计单个value
,这是令人困惑的。
我怎样才能做到这一点?
更新:
在考虑了这个问题之后,我现在意识到传播日期值会更容易,只需保留actual
或offered
。
我不会改变这个问题,因为有一些有用的解决方案可以帮助其他人,但我会添加它。
最终,这样的事情是理想的:
这是使用来自gather
的spread
和tidyr
的方式 -
df %>%
gather(key = variable, value = value, -date, -hour) %>%
spread(hour, value)
# A tibble: 8 x 26
date variable `00` `01` `02` `03` `04` `05` `06` `07` `08` `09` `10` `11` `12` `13` `14` `15` `16`
<date> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2018-02-01 answered 30 28 15 21 11 14 18 35 42 36 37 38 54 45 37 51 40
2 2018-02-01 offered 30 28 15 21 11 14 18 35 42 36 37 38 54 45 37 52 40
3 2018-02-02 answered 25 21 18 20 7 14 14 28 37 49 46 31 43 45 39 31 47
4 2018-02-02 offered 25 21 18 20 7 14 14 28 37 50 46 31 45 45 39 31 48
5 2018-02-03 answered 20 31 15 25 17 12 21 42 106 115 134 127 93 107 97 88 94
6 2018-02-03 offered 20 31 15 26 18 12 21 42 107 118 138 137 93 109 102 91 102
7 2018-02-04 answered 28 19 23 12 16 12 17 39 91 115 104 95 65 79 67 73 64
8 2018-02-04 offered 28 19 23 12 16 12 18 39 96 119 111 95 65 81 67 76 64
# ... with 7 more variables: `17` <int>, `18` <int>, `19` <int>, `20` <int>, `21` <int>, `22` <int>, `23` <int>
我建议在传播之前将小时值更改为字符(例如h00)。
我不确定你想要输出的宽度,但是这里有两个选项data.table melt
和dcast
library(data.table)
setDT(df)
dcast(melt(df, c('date', 'hour')), date + variable ~ paste0('hour_', hour))
#
# date variable hour_00 hour_01 hour_02 hour_03 hour_04 hour_05 hour_06 hour_07
# 1: 2018-02-01 offered 30 28 15 21 11 14 18 35
# 2: 2018-02-01 answered 30 28 15 21 11 14 18 35
# 3: 2018-02-02 offered 25 21 18 20 7 14 14 28
# 4: 2018-02-02 answered 25 21 18 20 7 14 14 28
# 5: 2018-02-03 offered 20 31 15 26 18 12 21 42
# 6: 2018-02-03 answered 20 31 15 25 17 12 21 42
# 7: 2018-02-04 offered 28 19 23 12 16 12 18 39
# 8: 2018-02-04 answered 28 19 23 12 16 12 17 39
# hour_08 hour_09 hour_10 hour_11 hour_12 hour_13 hour_14 hour_15 hour_16 hour_17
# 1: 42 36 37 38 54 45 37 52 40 66
# 2: 42 36 37 38 54 45 37 51 40 66
# 3: 37 50 46 31 45 45 39 31 48 69
# 4: 37 49 46 31 43 45 39 31 47 65
# 5: 107 118 138 137 93 109 102 91 102 76
# 6: 106 115 134 127 93 107 97 88 94 74
# 7: 96 119 111 95 65 81 67 76 64 64
# 8: 91 115 104 95 65 79 67 73 64 64
# hour_18 hour_19 hour_20 hour_21 hour_22 hour_23
# 1: 84 69 75 51 39 38
# 2: 83 68 74 51 39 38
# 3: 91 117 74 66 60 37
# 4: 81 83 61 65 58 37
# 5: 76 70 68 74 55 54
# 6: 74 66 65 69 52 51
# 7: 68 71 54 65 51 41
# 8: 68 70 53 64 48 38
或者,如果您想要提供和回答的单独列
dcast(df, date ~ hour, value.var = c('offered', 'answered'))
# date offered_00 offered_01 offered_02 offered_03 offered_04 offered_05
# 1: 2018-02-01 30 28 15 21 11 14
# 2: 2018-02-02 25 21 18 20 7 14
# 3: 2018-02-03 20 31 15 26 18 12
# 4: 2018-02-04 28 19 23 12 16 12
# offered_06 offered_07 offered_08 offered_09 offered_10 offered_11 offered_12
# 1: 18 35 42 36 37 38 54
# 2: 14 28 37 50 46 31 45
# 3: 21 42 107 118 138 137 93
# 4: 18 39 96 119 111 95 65
# offered_13 offered_14 offered_15 offered_16 offered_17 offered_18 offered_19
# 1: 45 37 52 40 66 84 69
# 2: 45 39 31 48 69 91 117
# 3: 109 102 91 102 76 76 70
# 4: 81 67 76 64 64 68 71
# offered_20 offered_21 offered_22 offered_23 answered_00 answered_01 answered_02
# 1: 75 51 39 38 30 28 15
# 2: 74 66 60 37 25 21 18
# 3: 68 74 55 54 20 31 15
# 4: 54 65 51 41 28 19 23
# answered_03 answered_04 answered_05 answered_06 answered_07 answered_08 answered_09
# 1: 21 11 14 18 35 42 36
# 2: 20 7 14 14 28 37 49
# 3: 25 17 12 21 42 106 115
# 4: 12 16 12 17 39 91 115
# answered_10 answered_11 answered_12 answered_13 answered_14 answered_15 answered_16
# 1: 37 38 54 45 37 51 40
# 2: 46 31 43 45 39 31 47
# 3: 134 127 93 107 97 88 94
# 4: 104 95 65 79 67 73 64
# answered_17 answered_18 answered_19 answered_20 answered_21 answered_22 answered_23
# 1: 66 83 68 74 51 39 38
# 2: 65 81 83 61 65 58 37
# 3: 74 74 66 65 69 52 51
# 4: 64 68 70 53 64 48 38