嗨,我正在尝试找出如何自动创建这样的矩阵或数据框。在此示例中,每行都是 1:3 的样本,并且各列必须具有相同的值频率。因此,在此示例中,col1 有 2 个一、2 个二和 2 个三,这对于所有列都适用。没有重复的行。
1 2 3
1 3 2
2 3 1
2 1 3
3 1 2
3 2 1
注意这个矩阵也满足条件
1 2 3
2 3 1
3 1 2
希望将其扩展到更大的矩阵,在其中我可以返回满足标准的 data.frame,即使它不包含所有可能的排列(例如第二个示例)。 大家有什么想法吗?
这应该可以做到:
generate_balanced_matrix <- function(n, num_rows = NULL) {
# Get all possible permutations
all_perms <- as.matrix(expand.grid(rep(list(1:n), n)))
# Filter for valid permutations (rows must contain each number exactly once)
valid_perms <- all_perms[apply(all_perms, 1, function(x) all(sort(x) == 1:n)), ]
# Function to check if columns are balanced
is_balanced <- function(mat) {
all(apply(mat, 2, function(x) all(table(x) == table(x)[1])))
}
# Function to get a random subset of rows that maintains balance
get_balanced_subset <- function(mat, desired_rows) {
max_attempts <- 1000
attempt <- 1
while(attempt <= max_attempts) {
# Randomly select rows
selected_rows <- sample(1:nrow(mat), min(desired_rows, nrow(mat)))
subset_mat <- mat[selected_rows, , drop = FALSE]
# Check if selection is balanced
if(is_balanced(subset_mat)) {
return(subset_mat)
}
attempt <- attempt + 1
}
stop("Could not find balanced subset after ", max_attempts, " attempts")
}
# If num_rows is not specified, find the minimum number that maintains balance
if(is.null(num_rows)) {
num_rows <- n
while(!is_balanced(valid_perms[1:num_rows, , drop = FALSE]) && num_rows <= nrow(valid_perms)) {
num_rows <- num_rows + 1
}
}
# Get balanced subset
result <- get_balanced_subset(valid_perms, num_rows)
# Convert to data.frame
result <- as.data.frame(result)
names(result) <- paste0("V", 1:n)
return(result)
}
set.seed(123) # for reproducibility
result1 <- generate_balanced_matrix(4, 4)
print(result1)
> print(result1)
V1 V2 V3 V4
1 4 1 3 2
2 3 2 4 1
3 2 4 1 3
4 1 3 2 4