feat(compilation): 启用 ARM64 优化与 OpenMP 并行支持
- 在 `.cmake/arm_optimization.cmake` 中增强 ARM64 编译优化选项,包括: * 添加 `-mtune=cortex-a76` 和更多特定于 ARM64 的优化标志 * 启用循环优化、浮点运算优化及链接时优化(LTO) - 在 `CMakeLists.txt` 中新增 `ENABLE_OPENMP` 选项以启用 OpenMP 支持 - 优化 `randombytes_ctrdrbg.c` 中的 AES 密钥调度和随机数生成逻辑,利用 ARM64 Crypto 扩展提升性能 - 在 `lll_tests.c` 中对关键循环进行展开以降低分支开销 - 在签名密钥生成和提交阶段引入 OpenMP 并行化处理,加快理想采样过程 - 注释掉未使用的机器学习日志函数 `ml_log_ideal_attempt` 实现 - 调整默认 `GF_RADIX` 为 64,并更新相关编译配置
This commit is contained in:
@@ -655,13 +655,48 @@ quat_test_lll_lideal_lideal_mul_reduced()
|
||||
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
||||
ibz_mat_4x4_transpose(&(gram_test), &(gram_test));
|
||||
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ibz_vec_4_set(&vec, (i == 0), (i == 1), (i == 2), (i == 3));
|
||||
quat_qf_eval(&norm, &gram, &vec);
|
||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
ibz_mul(&norm, &(prod.norm), &norm);
|
||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
}
|
||||
|
||||
// ARM优化: 循环展开以减少分支预测失败的可能性
|
||||
// 原始循环:
|
||||
// for (int i = 0; i < 4; i++) {
|
||||
// ibz_vec_4_set(&vec, (i == 0), (i == 1), (i == 2), (i == 3));
|
||||
// quat_qf_eval(&norm, &gram, &vec);
|
||||
// quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
// ibz_mul(&norm, &(prod.norm), &norm);
|
||||
// res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
// }
|
||||
|
||||
// 展开后的循环 - 减少循环开销,更适合ARM处理器流水线
|
||||
ibz_vec_4_set(&vec, 1, 0, 0, 0);
|
||||
quat_qf_eval(&norm, &gram, &vec);
|
||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
ibz_mul(&norm, &(prod.norm), &norm);
|
||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
|
||||
ibz_vec_4_set(&vec, 0, 1, 0, 0);
|
||||
quat_qf_eval(&norm, &gram, &vec);
|
||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
ibz_mul(&norm, &(prod.norm), &norm);
|
||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
|
||||
ibz_vec_4_set(&vec, 0, 0, 1, 0);
|
||||
quat_qf_eval(&norm, &gram, &vec);
|
||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
ibz_mul(&norm, &(prod.norm), &norm);
|
||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
|
||||
ibz_vec_4_set(&vec, 0, 0, 0, 1);
|
||||
quat_qf_eval(&norm, &gram, &vec);
|
||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||
ibz_mul(&norm, &(prod.norm), &norm);
|
||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||
|
||||
// 使用NEON优化大整数运算(如果可用)
|
||||
#ifdef HAVE_NEON
|
||||
// 在支持NEON的ARM64平台上并行处理多个规范评估
|
||||
// 这里可以进一步优化,但需要重构底层的大整数运算库
|
||||
#endif
|
||||
|
||||
quat_lattice_hnf(&(prod.lattice));
|
||||
|
||||
res = res || !quat_lideal_equals(&i1, &lideal1, &alg);
|
||||
|
||||
Reference in New Issue
Block a user