约束矩阵

问题描述 投票:0回答:2

我需要创建所有可能的维度为 5 (5x5) 的矩阵,其中所有元素都是从 0 到 100 的整数,其总和为 100。

我不知道该怎么做,或者如何开始......有什么建议吗?

尽管我用 R 编程,但我正在寻找如何做到这一点的想法。伪代码没问题。

我的第一种方法是获取 100 个元素的所有排列 25 次(矩阵中的每个元素一个),然后只取那些总和为 100 的排列。但这就是 100^25 种排列……没有办法通过这种方式做到这一点这种方法。

我会感谢任何想法和/或帮助!

r constraints permutation
2个回答
5
投票

OP 正在寻找最大长度为 25 的数字 100 的所有整数分区。包

partitions
配备了一个专门用于此目的的函数,称为
restrictedparts
。例如:

library(partitions)

## Keep the output tidy
options(digits = 4)
options(width = 90)

## all integer partitions of 10 of maximal length = 4
restrictedparts(10, 4)
#>                                                    
#> [1,] 10 9 8 7 6 5 8 7 6 5 6 5 4 4 7 6 5 4 5 4 3 4 3
#> [2,]  0 1 2 3 4 5 1 2 3 4 2 3 4 3 1 2 3 4 2 3 3 2 3
#> [3,]  0 0 0 0 0 0 1 1 1 1 2 2 2 3 1 1 1 1 2 2 3 2 2
#> [4,]  0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2

一旦生成了所有这些,只需为每个组合创建一个 5x5 矩阵(

restrictedparts
不区分
0 0 3
0 3 0
)。唯一的问题是,有太多可能的组合 (
partitions::R(25, 100, TRUE) = 139620591
),当您调用
restrictedparts(100, 25)
时,函数会抛出错误。

test <- restrictedparts(100, 25)
#> Warning in restrictedparts(100, 25): NAs introduced by coercion to integer range
#> Error in restrictedparts(100, 25): NAs in foreign function call (arg 3)

由于我们无法通过

restrictedparts
全部生成它们,因此我们可以使用
firstrestrictedpart
nextrestrictedpart
单独生成它们,如下所示:

funPartition <- function(p, n) {
    mat <- matrix(nrow = 25, ncol = n)
    mat[, 1] <- p

    for (i in 2:n) {
        p <- nextrestrictedpart(p)
        mat[, i] <- p
    }

    mat
}

head(funPartition(firstrestrictedpart(100, 25), 5))
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]  100   99   98   97   96
#> [2,]    0    1    2    3    4
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0    0
#> [6,]    0    0    0    0    0

这里唯一的问题是迭代器由于不断复制而效率不高。

输入RcppAlgos

有一种更快的方法使用该包

RcppAlgos
(我是作者)。与
partitions
包类似,有一个函数
partitionsGeneral
,用于生成所有分区。

library(RcppAlgos)
## Target is implicitly set to 100 below. For different targets, explicitly
## set the target parameter. E.g.:
##
##     partitionsGeneral(0:100, 25, TRUE, target = 200, upper = 10^5)
##
## Will generate the first 10^5 partitions of 200 using the vector 0:100

matrixParts <- apply(
    partitionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
    1, \(x) matrix(x, ncol = 5), simplify = FALSE
)

all(sapply(matrixParts, sum) == 100)
#> [1] TRUE

matrixParts[c(1, 90, 10^5)]
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    1
#> [4,]    0    0    0    0   39
#> [5,]    0    0    0    0   60
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    5
#> [2,]    0    0    0    0   13
#> [3,]    0    0    0    0   17
#> [4,]    0    0    0    0   27
#> [5,]    0    0    0    2   36

更好的方法:迭代器

还有内存高效的迭代器可用于组合数学中的许多主题,包括整数分区(例如

partitionsIter
)。

使用迭代器,我们可以创建一个辅助函数,可以将每个结果转换为我们想要的矩阵。

matFromIter <- function(it, ncol = 5L) {
    matrix(it@nextIter(), ncol = ncol)
}

## Initialize partitions iterator
it <- partitionsIter(0:100, 25, repetition = TRUE)
## Get the first 3 results
replicate(3, matFromIter(it))
#> , , 1
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> , , 2
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    1
#> [5,]    0    0    0    0   99
#> 
#> , , 3
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    2
#> [5,]    0    0    0    0   98
## Get 2 more picking up where we left off above
replicate(2, matFromIter(it))
#> , , 1
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    3
#> [5,]    0    0    0    0   97
#> 
#> , , 2
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    4
#> [5,]    0    0    0    0   96
## Reset iterator
it@startOver()
## Get random lexicographical result using the method: `[[`
matrix(it[[1e6]], ncol = 5)
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    7
#> [2,]    0    0    0    0   10
#> [3,]    0    0    0    2   11
#> [4,]    0    0    0    2   22
#> [5,]    0    0    0    2   44
## Get the last one
matrix(it@back(), ncol = 5)
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    4    4    4    4    4
#> [2,]    4    4    4    4    4
#> [3,]    4    4    4    4    4
#> [4,]    4    4    4    4    4
#> [5,]    4    4    4    4    4

需要排列吗?

如果你真的想要排列,没问题,只需拨打

compositionsGeneral
:

matrixComps <- apply(
    compositionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
    1, \(x) matrix(x, ncol = 5), simplify = FALSE
)

all(sapply(matrixComps, sum) == 100)
#> [1] TRUE

## Compare to the output of matrixCombs
matrixComps[c(1, 90, 10^5)]
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0   89
#> [5,]    0    0    0    0   11
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0   27
#> [3,]    0    0    0    0    6
#> [4,]    0    0    0    0   51
#> [5,]    0    0    0    0   16

随机抽样

由于结果数量如此庞大,抽样可能是我们最好的选择。考虑一下我们要处理的总结果有多少:

partitionsCount(0:100, 25, TRUE)
#> [1] 139620591

compositionsCount(0:100, 25, TRUE)
#> Big Integer ('bigz') :
#> [1] 87676181447775191489836

我们可以使用

partitionsSample
compositionsSample
来快速生成可以转换为所需矩阵输出的候选值。

## Optional, use seed parameter for reproducibility
apply(partitionsSample(0:100, 25, TRUE, n = 3, seed = 42), 1, \(x) {
    matrix(x, ncol = 5)
}, simplify = FALSE)
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    4    7    7
#> [2,]    0    0    4    7    7
#> [3,]    0    1    4    7    8
#> [4,]    0    1    5    7    8
#> [5,]    0    1    5    7   10
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    1    4    4    5
#> [2,]    1    1    4    5    5
#> [3,]    1    2    4    5    5
#> [4,]    1    2    4    5   11
#> [5,]    1    3    4    5   16
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    1    1    1    8
#> [2,]    0    1    1    1   11
#> [3,]    0    1    1    2   16
#> [4,]    0    1    1    6   17
#> [5,]    0    1    1    8   20

apply(compositionsSample(0:100, 25, TRUE, n = 3, seed = 28), 1, \(x) {
    matrix(x, ncol = 5)
}, simplify = FALSE)
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    2    6    1    2
#> [2,]    0    2    1    6    2
#> [3,]   12    2    3    1    1
#> [4,]    3    2    3   24    1
#> [5,]    7    4    4    5    6
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    1    9    4    5
#> [2,]    6    2    1    4    7
#> [3,]    1    4   24    4    2
#> [4,]    3    2    2    1    6
#> [5,]    1    7    2    1    1
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    1    3    9    3
#> [2,]    3    2    8    1    3
#> [3,]    8    5    6    2    6
#> [4,]    3    3   11    1    2
#> [5,]    1    3    4    5    6

效率

所有函数都用

C++
编写,以实现最终效率。考虑迭代超过 10,000 个分区。

library(microbenchmark)
pkg_partitions <- function(n, k, total) {
    a <- firstrestrictedpart(n, k)
    for (i in 1:(total - 1)) a <- nextrestrictedpart(a)
}

pkg_RcppAlgos <- function(n, k, total) {
    a <- partitionsIter(0:n, k, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

microbenchmark(cbRcppAlgos  = pkg_RcppAlgos(100, 25, 10^4),
               cbPartitions = pkg_partitions(100, 25, 10^4),
               times = 25, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgos = pkg_RcppAlgos(100, 25, 10^4), cbPartitions =
#> pkg_partitions(100, : less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>          expr   min    lq  mean median    uq   max neval
#>   cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    25
#>  cbPartitions 23.94 23.45 23.17  23.31 22.22 32.84    25

生成 10^5 随机样本不需要时间,特别是在使用多线程时:

system.time(partitionsSample(0:100, 25, TRUE, nThreads = 6,
                             n = 1e5, seed = 42))
#>    user  system elapsed 
#>   1.973   0.004   0.348

system.time(compositionsSample(0:100, 25, TRUE, nThreads = 6,
                               n = 1e5, seed = 28))
#>    user  system elapsed 
#>   0.300   0.001   0.062

2
投票

这是一个生成单个目标矩阵的函数 - 可能不是最有效的方法,如果运行大量次,您只会获得“所有”可能的组合。您可以如下所示在 lapply() 上使用

rep(5, num)
来生成其中的
num

norm100 <- function(n=5){ # generate some random values vec <- sample(0:100, size=n^2) # put them in a matrix, normalizing to 100 and rounding mat <- matrix(round((vec / sum(vec)) * 100), nrow=n) # find out how much the rounding makes us deviate from 100 off_by <- sum(mat) - 100 # get a random matrix element index modify_idx <- sample(length(mat), 1) # if adjusting by `off_by` would put us out of the target interval, try again while ((mat[modify_idx] - off_by) < 0 | (mat[modify_idx] - off_by) > 100){ modify_idx <- sample(length(mat), 1) } # once we have one (usually on the first shot), adjust so that mat sums to 100 mat[modify_idx] <- mat[modify_idx] - off_by return(mat) } runs <- 1000 matrices <- lapply(rep(5, runs), norm100)

即使运行了几次 100,000 次,我也没有得到任何重复的东西,但如果你这样做了,你总是可以扔掉重复的东西。 

© www.soinside.com 2019 - 2024. All rights reserved.