我正在尝试在 CUDA 内核中实现椭圆曲线点操作。为此,我使用 CGBN 库:https://github.com/NVlabs/CGBN/tree/master
该用例适用于非常具体的椭圆曲线设置(SageMath 中的定义):
p = 0x62CE5177412ACA899CF5
r = 0x1CE4AF36EED8DE22B99D
a = 0x39C95E6DDDB1BC45733C
b = 0x1F16D880E89D5A1C0ED1
n = 0x62CE5177407B7258DC31
P_x = 0x315D4B201C208475057D
P_y = 0x035F3DF5AB370252450A
Q_x = 0x0679834CEFB7215DC365
Q_y = 0x4084BC50388C4E6FDFAB
F = GF(p)
E = EllipticCurve(F, [a, b])
P = E(P_x, P_y)
Q = E(Q_x, Q_y)
到目前为止,我所做的实现对于大多数点都有效,但我意外地发现了一些特定的点对,而我得到了错误的结果。我在代码中查看了几次,但找不到任何可能导致此行为的原因。下面是实现函数:
__device__ dev_EC_point add_points(env192_t bn_env, const dev_EC_point &P1, const dev_EC_point &P2, const dev_Parameters ¶ms)
{
if (cgbn_equals(bn_env, P1.x, P2.x) && cgbn_equals(bn_env, P1.y, P2.y))
{
return double_point(bn_env, P1, params);
}
env192_t::cgbn_t t2;
if (cgbn_sub(bn_env, t2, P1.x, P2.x)) // x1 - x2 mod Pmod
{
printf("BREAKPOINT 1\n");
cgbn_sub(bn_env, t2, P2.x, P1.x);
cgbn_sub(bn_env, t2, params.Pmod, t2);
}
cgbn_modular_inverse(bn_env, t2, t2, params.Pmod); // 1/(x1-x2) mod Pmod
// Montgomery space
env192_t::cgbn_t x1, y1, x2, y2;
uint32_t np0;
np0 = cgbn_bn2mont(bn_env, x1, P1.x, params.Pmod);
cgbn_bn2mont(bn_env, y1, P1.y, params.Pmod);
cgbn_bn2mont(bn_env, x2, P2.x, params.Pmod);
cgbn_bn2mont(bn_env, y2, P2.y, params.Pmod);
cgbn_bn2mont(bn_env, t2, t2, params.Pmod);
env192_t::cgbn_t t1;
if (cgbn_sub(bn_env, t1, y1, y2)) // y0 - y1 mod Pmod
{
printf("BREAKPOINT 2\n");
cgbn_sub(bn_env, t1, y2, y1);
cgbn_sub(bn_env, t1, params.Pmod, t1);
}
env192_t::cgbn_t s, s_sq, x3, y3, t3;
cgbn_mont_mul(bn_env, s, t1, t2, params.Pmod, np0); // s = (y1-y2)/(x1-x2) mod Pmod // tested
cgbn_mont_sqr(bn_env, s_sq, s, params.Pmod, np0); // s^2 mod Pmod // tested
cgbn_add(bn_env, t3, x1, x2); // x1 + x2
if (cgbn_sub(bn_env, x3, s_sq, t3)) // x3 = s^2 - x1 - x2 // mod Pmod
{
printf("BREAKPOINT 4\n");
cgbn_sub(bn_env, x3, t3, s_sq);
cgbn_sub(bn_env, x3, params.Pmod, x3);
}
if (cgbn_sub(bn_env, t3, x1, x3)) // t3 = x1 - x3 // mod Pmod
{
printf("BREAKPOINT 5\n");
cgbn_sub(bn_env, t3, x3, x1);
cgbn_sub(bn_env, t3, params.Pmod, t3);
}
cgbn_mont_mul(bn_env, t3, t3, s, params.Pmod, np0);
if (cgbn_sub(bn_env, y3, t3, y1))
{
printf("BREAKPOINT 6\n");
cgbn_sub(bn_env, y3, y1, t3);
cgbn_sub(bn_env, y3, params.Pmod, y3);
}
cgbn_mont2bn(bn_env, x3, x3, params.Pmod, np0);
cgbn_mont2bn(bn_env, y3, y3, params.Pmod, np0);
// cgbn_sub(bn_env, x3, s_sq, t1);
return dev_EC_point{x3, y3};
}
以及用于测试的内核:
__global__ void ker_add_points(cgbn_error_report_t *report, EC_point *points, EC_parameters *parameters, int32_t instances)
{
int32_t instance;
int32_t points_index;
instance = (blockIdx.x * blockDim.x + threadIdx.x) / TPI;
points_index = instance * 2;
if (instance >= instances)
{
return;
}
context_t bn_context(cgbn_report_monitor, report, instance); // construct a context
env192_t bn192_env(bn_context.env<env192_t>());
dev_EC_point P0, P1;
dev_Parameters params;
cgbn_load(bn192_env, P0.x, &(points[points_index].x));
cgbn_load(bn192_env, P0.y, &(points[points_index].y));
cgbn_load(bn192_env, P1.x, &(points[points_index + 1].x));
cgbn_load(bn192_env, P1.y, &(points[points_index + 1].y));
cgbn_load(bn192_env, params.Pmod, &(parameters->Pmod));
cgbn_load(bn192_env, params.a, &(parameters->a));
dev_EC_point result = add_points(bn192_env, P0, P1, params);
cgbn_store(bn192_env, &(points[points_index].x), result.x);
cgbn_store(bn192_env, &(points[points_index].y), result.y);
}
有问题的点对:
A = Q * 1001
B = Q * 20
R = A + B # got wrong answer during tests
A = P * 15
B = Q
R = A + B # got wrong answer during tests
所有测试均使用 SageMath 和 pytest 运行,并通过 ctypes 调用测试函数。 整个项目的背景:https://github.com/atlomak/thesis
通过更多测试并添加 print 语句,我发现有时
x1 + x2
步骤中的加法结果比 p
更大。
我通过添加额外的检查来解决问题是否超过 p:
cgbn_add(bn_env, t3, x1, x2); // x1 + x2
if(cgbn_compare(bn_env, t3, params.Pmod) > 0)
{
cgbn_sub(bn_env, t3, t3, params.Pmod);
}