根据真实数据手动计算所有硬币翻转概率

问题描述 投票:0回答:1

我有这个抛硬币数据:

library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)

set.seed(123)

n_flips <- 100
flips <- sample(c("H", "T"), n_flips, replace = TRUE)

我手动计算所有条件概率并将结果总结在表格中:

get_conditional_prob <- function(sequence, data) {
  n <- nchar(sequence)
  
  windows <- character(length(data) - n)
  next_outcomes <- character(length(data) - n)
  
  for(i in 1:(length(data) - n)) {
    windows[i] <- paste(data[i:(i+n-1)], collapse="")
    next_outcomes[i] <- data[i+n]
  }
  
  matches <- windows == sequence
  count <- sum(matches)
  
  if(count > 0) {
    next_after_matches <- next_outcomes[matches]
    prob_h <- mean(next_after_matches == "H")
    prob_t <- mean(next_after_matches == "T")
  } else {
    prob_h <- NA
    prob_t <- NA
  }
  
  return(c(prob_h, prob_t, count))
}

sequences_1 <- c("H", "T")
sequences_2 <- c("HH", "HT", "TH", "TT")
sequences_3 <- c("HHH", "HHT", "HTH", "HTT", "THH", "THT", "TTH", "TTT")
sequences_4 <- c("HHHH", "HHHT", "HHTH", "HHTT", "HTHH", "HTHT", "HTTH", "HTTT",
                "THHH", "THHT", "THTH", "THTT", "TTHH", "TTHT", "TTTH", "TTTT")
sequences_5 <- c("HHHHH", "HHHHT", "HHHTH", "HHHTT", "HHTHH", "HHTHT", "HHTTH", "HHTTT",
                "HTHHH", "HTHHT", "HTHTH", "HTHTT", "HTTHH", "HTTHT", "HTTTH", "HTTTT",
                "THHHH", "THHHT", "THHTH", "THHTT", "THTHH", "THTHT", "THTTH", "THTTT",
                "TTHHH", "TTHHT", "TTHTH", "TTHTT", "TTTHH", "TTTHT", "TTTTH", "TTTTT")

all_sequences <- c(sequences_1, sequences_2, sequences_3, sequences_4, sequences_5)

results <- data.frame(
  Sequence = character(),
  Next_H = numeric(),
  Next_T = numeric(),
  Count = numeric(),
  stringsAsFactors = FALSE
)

for(seq in all_sequences) {
  probs <- get_conditional_prob(seq, flips)
  results <- rbind(results, 
                  data.frame(
                    Sequence = seq,
                    Next_H = probs[1],
                    Next_T = probs[2],
                    Count = probs[3]
                  ))
}

results_formatted <- results %>%
  mutate(
    Length = nchar(Sequence),
    Next_H = round(Next_H, 3),
    Next_T = round(Next_T, 3)
  ) %>%
  arrange(Length, Sequence) %>%
  select(
    'Pattern' = Sequence,
    'Length' = Length,
    'P(H|Pattern)' = Next_H,
    'P(T|Pattern)' = Next_T,
    'Occurrences' = Count
  )

kable(results_formatted,
      format = "html",
      caption = "Conditional Probabilities in Coin Flip Sequence (up to length 5)",
      align = c('l', 'c', 'c', 'c', 'c')) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "left",
    font_size = 12
  ) %>%
  add_header_above(c(" " = 2, "Conditional Probabilities" = 2, " " = 1)) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("Single Flip", 1, 2) %>%
  pack_rows("Two Flips", 3, 6) %>%
  pack_rows("Three Flips", 7, 14) %>%
  pack_rows("Four Flips", 15, 30) %>%
  pack_rows("Five Flips", 31, 62)

enter image description here

我可以做些什么来自动计算所有组合,而无需手动枚举它们吗?手动枚举所有组合然后将它们输入代码将变得相当冗长。有更简单的方法吗?

r
1个回答
0
投票
make_seq <- function(n_flips) {
  R.utils::intToBin(0:(2^n_flips - 1)) |>
    gsub("0", "H", x = _) |>
    gsub("1", "T", x = _)
}

lapply(1:5, make_seq) |> unlist()

这将创建示例中的所有序列。

 [1] "H"     "T"     "HH"    "HT"    "TH"    "TT"    "HHH"   "HHT"   "HTH"   "HTT"   "THH"   "THT"  
[13] "TTH"   "TTT"   "HHHH"  "HHHT"  "HHTH"  "HHTT"  "HTHH"  "HTHT"  "HTTH"  "HTTT"  "THHH"  "THHT" 
[25] "THTH"  "THTT"  "TTHH"  "TTHT"  "TTTH"  "TTTT"  "HHHHH" "HHHHT" "HHHTH" "HHHTT" "HHTHH" "HHTHT"
[37] "HHTTH" "HHTTT" "HTHHH" "HTHHT" "HTHTH" "HTHTT" "HTTHH" "HTTHT" "HTTTH" "HTTTT" "THHHH" "THHHT"
[49] "THHTH" "THHTT" "THTHH" "THTHT" "THTTH" "THTTT" "TTHHH" "TTHHT" "TTHTH" "TTHTT" "TTTHH" "TTTHT"
[61] "TTTTH" "TTTTT"
© www.soinside.com 2019 - 2024. All rights reserved.