我想在 C 和定点中实现乘法,具有可扩展的分数位宽(即从 1 位到 30 位),我知道最简单的方法是这样的:
fixedpt fixedpt_mul(fixedpt A, fixedpt B)
{
return (((fixedptd)A * (fixedptd)B) >> FIXEDPT_FBITS);
}
但是这种方法在32bit OS系统中小数位宽较大时容易溢出。 我发现一个 existing repo 在乘法之前将整数部分和小数部分分开,这种方法缓解了上述问题,但它只实现了 16 小数位宽,所以我修改它以适应更一般的情况:
#include <stdint.h>
#include <stdio.h>
typedef int32_t fix_t;
// w means fraction bitwidth
#define fix_ONE(w) ((fix_t)(0x00000001 << w))
#define MASK_M(w) (((fix_t)1 << w) - 1) // mask of mantissa, all LSB set, all MSB clear
#define fix_rconst(x, w) ((fix_t)((x) * fix_ONE(w) + ((x) >= 0 ? 0.5 : -0.5))) // convert a const to rounded fix_t
static const fix_t fix_overflow = 0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000
static inline float fix_to_float(fix_t a, int w) { return ((float)a / fix_ONE(w)); }
fix_t fix_mul(fix_t inArg0, fix_t inArg1, int w) {
// w is fractional bitwidth
// separate interger part and fraciton part
int32_t A = (inArg0 >> w), C = (inArg1 >> w);
uint32_t B = (inArg0 & MASK_M(w)), D = (inArg1 & MASK_M(w));
int32_t AC = A*C;
int32_t AD_CB = A*D + C*B;
uint32_t BD = B*D;
int32_t product_hi = AC + (AD_CB >> w); // product_hi stands for the interger part of final result
uint32_t ad_cb_temp = AD_CB << (32-w); // get the fraction part of AD_CB
uint32_t product_lo = BD + ad_cb_temp; //product_lo stands for the fraction part of final result
if (product_lo < BD || product_lo < ad_cb_temp) { // check if product_lo overflow
product_hi++;
}
// The upper part bits should all be the same (including the sign).
if (product_hi >> 31 != product_hi >> (31-w)) {
printf("Overflow in fix_mul(), please use other bitwidth \n");
return fix_overflow;
}
// combine interger part and fraction part
return (product_hi << (w)) | (product_lo >> (32-w));
}
int test_mul(void)
{
// test cases
float a = 0.267f; //0.50f;//1.267f; //-1.267f;//-1.267f;
float b = 0.849f; //0.25f;//1.849f; //1.849f; //-1.849f;
for (int w = 1; w < 28; w++) {
fix_t aa = fix_rconst(a, w);
fix_t bb = fix_rconst(b, w);
fix_t result = fix_mul(aa, bb, w);
float fresult = fix_to_float(result, w);
printf("fix_rconst(%f, %i) = %i, fix_rconst(%f, %i) = %i, result = %i, fresult=%f \n", a, w, aa, b, w, bb, result, fresult);
}
return 0;
}
int main()
{
test_mul();
// system("pause");
return 0;
}
您可以在这里在线获取代码。 但是测试结果除了16位宽外都不正确:
fix_rconst(0.267000, 1) = 1, fix_rconst(0.849000, 1) = 2, result = 1, fresult=0.500000
fix_rconst(0.267000, 2) = 1, fix_rconst(0.849000, 2) = 3, result = 0, fresult=0.000000
fix_rconst(0.267000, 3) = 2, fix_rconst(0.849000, 3) = 7, result = 0, fresult=0.000000
fix_rconst(0.267000, 4) = 4, fix_rconst(0.849000, 4) = 14, result = 0, fresult=0.000000
fix_rconst(0.267000, 5) = 9, fix_rconst(0.849000, 5) = 27, result = 0, fresult=0.000000
fix_rconst(0.267000, 6) = 17, fix_rconst(0.849000, 6) = 54, result = 0, fresult=0.000000
fix_rconst(0.267000, 7) = 34, fix_rconst(0.849000, 7) = 109, result = 0, fresult=0.000000
fix_rconst(0.267000, 8) = 68, fix_rconst(0.849000, 8) = 217, result = 0, fresult=0.000000
fix_rconst(0.267000, 9) = 137, fix_rconst(0.849000, 9) = 435, result = 0, fresult=0.000000
fix_rconst(0.267000, 10) = 273, fix_rconst(0.849000, 10) = 869, result = 0, fresult=0.000000
fix_rconst(0.267000, 11) = 547, fix_rconst(0.849000, 11) = 1739, result = 0, fresult=0.000000
fix_rconst(0.267000, 12) = 1094, fix_rconst(0.849000, 12) = 3478, result = 3, fresult=0.000732
fix_rconst(0.267000, 13) = 2187, fix_rconst(0.849000, 13) = 6955, result = 29, fresult=0.003540
fix_rconst(0.267000, 14) = 4375, fix_rconst(0.849000, 14) = 13910, result = 232, fresult=0.014160
fix_rconst(0.267000, 15) = 8749, fix_rconst(0.849000, 15) = 27820, result = 1856, fresult=0.056641
fix_rconst(0.267000, 16) = 17498, fix_rconst(0.849000, 16) = 55640, result = 14855, fresult=0.226669
fix_rconst(0.267000, 17) = 34996, fix_rconst(0.849000, 17) = 111280, result = 118846, fresult=0.906723
fix_rconst(0.267000, 18) = 69992, fix_rconst(0.849000, 18) = 222560, result = 164338, fresult=0.626900
fix_rconst(0.267000, 19) = 139985, fix_rconst(0.849000, 19) = 445121, result = 266201, fresult=0.507738
fix_rconst(0.267000, 20) = 279970, fix_rconst(0.849000, 20) = 890241, result = 32390, fresult=0.030890
fix_rconst(0.267000, 21) = 559940, fix_rconst(0.849000, 21) = 1780482, result = 259120, fresult=0.123558
fix_rconst(0.267000, 22) = 1119879, fix_rconst(0.849000, 22) = 3560964, result = 2069485, fresult=0.493404
fix_rconst(0.267000, 23) = 2239758, fix_rconst(0.849000, 23) = 7121928, result = 8167272, fresult=0.973615
fix_rconst(0.267000, 24) = 4479517, fix_rconst(0.849000, 24) = 14243856, result = 15062169, fresult=0.897775
fix_rconst(0.267000, 25) = 8959033, fix_rconst(0.849000, 25) = 28487712, result = 19611502, fresult=0.584468
fix_rconst(0.267000, 26) = 17918066, fix_rconst(0.849000, 26) = 56975424, result = 22674290, fresult=0.337873
fix_rconst(0.267000, 27) = 35836132, fix_rconst(0.849000, 27) = 113950848, result = 47176592, fresult=0.351493
如果类型
fixedpt
和 fixedptd
分别是 int32_t
和 int64_t
,则简单的实现适用于所有小数位宽,除了舍入方法。您可能想尝试这种变体:
fixedpt fixedpt_mul(fixedpt A, fixedpt B)
{
return (((fixedptd)A * (fixedptd)B + (1 << (FIXEDPT_FBITS - 1))) >> FIXEDPT_FBITS);
}
C 标准要求类型
long long
至少有 63 个值位,因此即使目标符合标准的旧版本,也应支持上述内容。
如果你不想使用 64 位乘法,你可以使用发布代码中的 16x16 -> 32 乘法来模拟它,然后将最终结果四舍五入并右移
FIXEDPT_FBITS
位,这将需要一些调整,具体取决于关于FIXEDPT_FBITS >= 16
与否。
// w is fractional bitwidth
// separate interger part and fraciton part
int32_t A = (inArg0 >> w), C = (inArg1 >> w);
uint32_t B = (inArg0 & MASK_M(w)), D = (inArg1 & MASK_M(w));
这段代码的重点是将32位值分成两部分。 巧合的是,在 libfixmath 中小数位数也是 16。即使您使用不同的定点位置,实际乘法也可以保持相同。
这是我早期一些作品的改编图像。移位量
w
只影响最终结果使用,不影响A/B/C/D值。请注意,当 w
不是 16 时,结果将不会与单词边界整齐对齐,因此必须进行移位。