我需要快速实现设定的产品基数,例如使用性能技巧来减少分支和内存访问。
在 C++ 中,签名类似于:
enum cardinality { ZERO, ONE, AT_MOST_ONE, ANY, AT_LEAST_ONE}
cardinality product(cardinality a, cardinality b) {
}
此表列举了该函数的预期结果:
0 1 ? * +
0 0 0 0 0 0
1 0 1 ? * +
? 0 ? ? * *
* 0 * * * *
+ 0 + * * +
想法
我可以想到两种实现方式:
cardinality
值,因此将它们移位/或其中两个一起生成用于查找的 6 位值。#include <cstdint>
#include <array>
#include <iostream>
enum cardinality : uint8_t { ZERO, ONE, AT_MOST_ONE, ANY, AT_LEAST_ONE };
cardinality table[25] = {
ZERO, ZERO, ZERO, ZERO, ZERO,
ZERO, ONE, AT_MOST_ONE, ANY, AT_LEAST_ONE,
ZERO, AT_MOST_ONE, AT_MOST_ONE, ANY, ANY,
ZERO, ANY, ANY, ANY, ANY,
ZERO, AT_LEAST_ONE, ANY, ANY, AT_LEAST_ONE
};
// Table for all combinations of 6 bits
cardinality table2[1 << 6];
// Use simple 2d table as lookup
cardinality product(cardinality a, cardinality b)
{
return table[uint8_t(a) + (uint8_t(b) * uint8_t(5))];
}
// Use generated 6-bit table. Not all values in table are populated
cardinality product2(cardinality a, cardinality b)
{
return table2[uint8_t(a) | (uint8_t(b) << 3)];
}
int main(int, char*[])
{
// Populate the 6 bit table. Could also just be hard-coded. Not all table entries are populated
// since we only need 25 values but 6 bits stores 63 values.
for (uint8_t a = 0; a < 5; a++)
{
for (uint8_t b = 0; b < 5; b++)
{
table2[a | (b << 3)] = product(cardinality(a), cardinality(b));
}
}
uint8_t a, b;
std::cin >> a;
std::cin >> b;
uint8_t p1 = product(cardinality(a), cardinality(b));
uint8_t p2 = product2(cardinality(a), cardinality(b));
return table[uint8_t(a) + (uint8_t(b) * uint8_t(5))];
}
我不知道这些是否是最快的,但生成的程序集看起来相当小。编译器资源管理器链接:https://godbolt.org/z/7q4zf441b.
生成程序集(
-O3
,Clang 15)的区别在于版本 1 使用 lea
,而版本 2 使用 shl
:
版本1:
product(cardinality, cardinality):
mov eax, edi
mov ecx, esi
lea rcx, [rcx + 4*rcx]
add rcx, rax
lea rax, [rip + table]
movzx eax, byte ptr [rax + rcx]
ret
版本2:
product2(cardinality, cardinality):
mov eax, edi
mov ecx, esi
shl rcx, 3
or rcx, rax
lea rax, [rip + table2]
movzx eax, byte ptr [rcx + rax]
ret