我在 R 中有这个 4 状态马尔可夫链:
我编写了以下(100 次迭代)模拟,该模拟模拟 100 名医院患者(所有患者均以状态 1 开始),并跟踪他们在模拟的每次迭代期间所处的状态。在每次迭代开始时,我们查看每个患者自上次迭代以来所处的状态……然后根据转移矩阵概率移动患者(对每个患者执行此操作)。例如
然后我将整个过程重复了 1000 次并绘制了结果:
library(reshape2)
library(ggplot2)
library(dplyr)
library(tidyr)
# transition matrix
P <- matrix(c(1/3, 1/3, 1/3, 0,
1/2, 1/2, 0, 0,
0, 0, 1/2, 1/2,
0, 0, 0, 1),
nrow = 4, ncol = 4, byrow = TRUE)
n_iterations <- 100
n_people <- 100
n_simulations <- 100
run_simulation <- function(sim_number) {
state_matrix <- matrix(0, nrow = n_iterations, ncol = n_people)
state_matrix[1, ] <- 1 # All start in state 1
for (i in 2:n_iterations) {
for (person in 1:n_people) {
current_state <- state_matrix[i-1, person]
state_matrix[i, person] <- sample(1:4, 1, prob = P[current_state, ])
}
}
state_counts <- apply(state_matrix, 1, function(x) table(factor(x, levels = 1:4)))
percentages <- as.data.frame(t(state_counts) / n_people * 100) # Convert to percentages (0-100)
# Create patient summary
patient_summary <- data.frame(
patient = 1:n_people,
simulation = sim_number,
state1 = rowSums(state_matrix == 1),
state2 = rowSums(state_matrix == 2),
state3 = rowSums(state_matrix == 3),
state4 = rowSums(state_matrix == 4)
)
list(percentages = percentages, patient_summary = patient_summary)
}
# Run simulations
set.seed(123) # For reproducibility
all_simulations <- vector("list", n_simulations)
all_patient_summaries <- vector("list", n_simulations)
for (sim in 1:n_simulations) {
simulation_result <- run_simulation(sim)
all_simulations[[sim]] <- simulation_result$percentages
all_patient_summaries[[sim]] <- simulation_result$patient_summary
if (sim %% 10 == 0) { # Print progress every 10 simulations
cat(sprintf("Completed simulation %d of %d\n", sim, n_simulations))
}
}
# Combine all patient summaries into one data frame
final_patient_summary <- do.call(rbind, all_patient_summaries)
# Reorder columns to put patient and simulation first
final_patient_summary <- final_patient_summary[, c("patient", "simulation", "state1", "state2", "state3", "state4")]
# calculate the average percentages across all simulations
average_percentages <- Reduce(`+`, all_simulations) / n_simulations
average_percentages$iteration <- 1:n_iterations
# function to calculate confidence intervals
calculate_ci <- function(x) {
quantile(x, probs = c(0.05, 0.95))
}
# confidence intervals
ci_data <- lapply(1:n_iterations, function(i) {
data.frame(
iteration = i,
state = as.character(1:4),
lower = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[1]),
upper = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[2])
)
})
ci_data <- do.call(rbind, ci_data)
# reshape
average_percentages_long <- average_percentages %>%
pivot_longer(cols = as.character(1:4),
names_to = "state",
values_to = "percentage")
plot_data <- merge(average_percentages_long, ci_data, by = c("iteration", "state"))
ggplot(plot_data, aes(x = iteration, y = percentage, color = state, fill = state)) +
geom_line() +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
scale_color_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
labels = paste("State", 1:4)) +
scale_fill_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
labels = paste("State", 1:4)) +
labs(x = "Iteration", y = "Average Percentage of People", color = "State", fill = "State") +
ggtitle("Average Percentage of People in Each State Over Time (1000 Simulations)") +
theme_minimal() +
scale_y_continuous(limits = c(0, 100))
print(head(final_patient_summary))
目标:我正在尝试修改此模拟以在每次迭代时记录:有多少患者首先进入状态 4,有多少患者已经处于状态 4。例如:
simulation iteration patient state
1 1 87 ...
1 1 88 ...
1 1 89 ...
.. ... ... ...
1 2 87 ...
1 2 88 ...
1 2 89 ...
.. ... ... ...
1 3 87 ...
1 3 88 ...
1 3 89 ...
... ... ... ...
2 1 87 ...
2 1 88 ...
2 1 89 ...
... ... ... ...
2 2 87 ...
2 2 88 ...
2 2 89 ...
... ... ... ...
这里,状态列将具有值:state1、state2、state3、state_4_new、state_4_existing
用于可视化马尔可夫链的 R 代码
grViz("
digraph {
rankdir=LR;
node [shape = circle]
1 [label = '1', style = filled, fillcolor = green]
2 [label = '2']
3 [label = '3']
4 [label = '4', style = filled, fillcolor = red]
1 -> 1 [label = '1/3']
1 -> 2 [label = '1/3']
1 -> 3 [label = '1/3']
2 -> 1 [label = '1/2']
2 -> 2 [label = '1/2']
3 -> 3 [label = '1/2']
3 -> 4 [label = '1/2']
4 -> 4 [label = '1']
}
")
只需添加一个新状态(例如 5)。任何处于状态 4 的患者将以 1 的概率“出院”。因此,他们将在一次迭代中处于状态 4,但在下一次迭代中,他们将处于状态 5。您的转换矩阵将类似于:
# transition matrix
P <- matrix(c(1/3, 1/3, 1/3, 0 , 0,
1/2, 1/2, 0, 0 , 0,
0, 0, 1/2, 1/2, 0,
0, 0, 0, 0 , 1,
0, 0, 0, 0 , 1),
nrow = 5, ncol = 5, byrow = TRUE)
这是一个可视化的图表:
DiagrammeR::grViz("
digraph {
rankdir=LR;
node [shape = circle]
0 [label = 'New', style = filled, fillcolor = grey]
1 [label = '1', style = filled, fillcolor = green]
2 [label = '2']
3 [label = '3']
4 [label = '4', style = filled, fillcolor = yellow]
5 [label = '5', style = filled, fillcolor = red]
0 -> 1 [label = 'Admitted']
1 -> 1 [label = '1/3']
1 -> 2 [label = '1/3']
1 -> 3 [label = '1/3']
2 -> 1 [label = '1/2']
2 -> 2 [label = '1/2']
3 -> 3 [label = '1/2']
3 -> 4 [label = '1/2']
4 -> 5 [label = '1 (discharged)']
5 -> 5 [label = '1']
}
")