我有这个抛硬币数据:
library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)
set.seed(123)
n_flips <- 100
flips <- sample(c("H", "T"), n_flips, replace = TRUE)
我手动计算所有条件概率并将结果总结在表格中:
get_conditional_prob <- function(sequence, data) {
n <- nchar(sequence)
windows <- character(length(data) - n)
next_outcomes <- character(length(data) - n)
for(i in 1:(length(data) - n)) {
windows[i] <- paste(data[i:(i+n-1)], collapse="")
next_outcomes[i] <- data[i+n]
}
matches <- windows == sequence
count <- sum(matches)
if(count > 0) {
next_after_matches <- next_outcomes[matches]
prob_h <- mean(next_after_matches == "H")
prob_t <- mean(next_after_matches == "T")
} else {
prob_h <- NA
prob_t <- NA
}
return(c(prob_h, prob_t, count))
}
sequences_1 <- c("H", "T")
sequences_2 <- c("HH", "HT", "TH", "TT")
sequences_3 <- c("HHH", "HHT", "HTH", "HTT", "THH", "THT", "TTH", "TTT")
sequences_4 <- c("HHHH", "HHHT", "HHTH", "HHTT", "HTHH", "HTHT", "HTTH", "HTTT",
"THHH", "THHT", "THTH", "THTT", "TTHH", "TTHT", "TTTH", "TTTT")
sequences_5 <- c("HHHHH", "HHHHT", "HHHTH", "HHHTT", "HHTHH", "HHTHT", "HHTTH", "HHTTT",
"HTHHH", "HTHHT", "HTHTH", "HTHTT", "HTTHH", "HTTHT", "HTTTH", "HTTTT",
"THHHH", "THHHT", "THHTH", "THHTT", "THTHH", "THTHT", "THTTH", "THTTT",
"TTHHH", "TTHHT", "TTHTH", "TTHTT", "TTTHH", "TTTHT", "TTTTH", "TTTTT")
all_sequences <- c(sequences_1, sequences_2, sequences_3, sequences_4, sequences_5)
results <- data.frame(
Sequence = character(),
Next_H = numeric(),
Next_T = numeric(),
Count = numeric(),
stringsAsFactors = FALSE
)
for(seq in all_sequences) {
probs <- get_conditional_prob(seq, flips)
results <- rbind(results,
data.frame(
Sequence = seq,
Next_H = probs[1],
Next_T = probs[2],
Count = probs[3]
))
}
results_formatted <- results %>%
mutate(
Length = nchar(Sequence),
Next_H = round(Next_H, 3),
Next_T = round(Next_T, 3)
) %>%
arrange(Length, Sequence) %>%
select(
'Pattern' = Sequence,
'Length' = Length,
'P(H|Pattern)' = Next_H,
'P(T|Pattern)' = Next_T,
'Occurrences' = Count
)
kable(results_formatted,
format = "html",
caption = "Conditional Probabilities in Coin Flip Sequence (up to length 5)",
align = c('l', 'c', 'c', 'c', 'c')) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left",
font_size = 12
) %>%
add_header_above(c(" " = 2, "Conditional Probabilities" = 2, " " = 1)) %>%
row_spec(0, bold = TRUE) %>%
pack_rows("Single Flip", 1, 2) %>%
pack_rows("Two Flips", 3, 6) %>%
pack_rows("Three Flips", 7, 14) %>%
pack_rows("Four Flips", 15, 30) %>%
pack_rows("Five Flips", 31, 62)
我可以做些什么来自动计算所有组合,而无需手动枚举它们吗?手动枚举所有组合然后将它们输入代码将变得相当冗长。有更简单的方法吗?
make_seq <- function(n_flips) {
R.utils::intToBin(0:(2^n_flips - 1)) |>
gsub("0", "H", x = _) |>
gsub("1", "T", x = _)
}
lapply(1:5, make_seq) |> unlist()
这将创建示例中的所有序列。
[1] "H" "T" "HH" "HT" "TH" "TT" "HHH" "HHT" "HTH" "HTT" "THH" "THT"
[13] "TTH" "TTT" "HHHH" "HHHT" "HHTH" "HHTT" "HTHH" "HTHT" "HTTH" "HTTT" "THHH" "THHT"
[25] "THTH" "THTT" "TTHH" "TTHT" "TTTH" "TTTT" "HHHHH" "HHHHT" "HHHTH" "HHHTT" "HHTHH" "HHTHT"
[37] "HHTTH" "HHTTT" "HTHHH" "HTHHT" "HTHTH" "HTHTT" "HTTHH" "HTTHT" "HTTTH" "HTTTT" "THHHH" "THHHT"
[49] "THHTH" "THHTT" "THTHH" "THTHT" "THTTH" "THTTT" "TTHHH" "TTHHT" "TTHTH" "TTHTT" "TTTHH" "TTTHT"
[61] "TTTTH" "TTTTT"