feat(compilation): 启用 ARM64 优化与 OpenMP 并行支持
- 在 `.cmake/arm_optimization.cmake` 中增强 ARM64 编译优化选项,包括: * 添加 `-mtune=cortex-a76` 和更多特定于 ARM64 的优化标志 * 启用循环优化、浮点运算优化及链接时优化(LTO) - 在 `CMakeLists.txt` 中新增 `ENABLE_OPENMP` 选项以启用 OpenMP 支持 - 优化 `randombytes_ctrdrbg.c` 中的 AES 密钥调度和随机数生成逻辑,利用 ARM64 Crypto 扩展提升性能 - 在 `lll_tests.c` 中对关键循环进行展开以降低分支开销 - 在签名密钥生成和提交阶段引入 OpenMP 并行化处理,加快理想采样过程 - 注释掉未使用的机器学习日志函数 `ml_log_ideal_attempt` 实现 - 调整默认 `GF_RADIX` 为 64,并更新相关编译配置
This commit is contained in:
@@ -23,39 +23,51 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} MAT
|
|||||||
add_compile_options(-march=armv8-a+crypto)
|
add_compile_options(-march=armv8-a+crypto)
|
||||||
add_compile_definitions(HAVE_ARM64_CRYPTO)
|
add_compile_definitions(HAVE_ARM64_CRYPTO)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# CPU特定优化
|
||||||
|
# 根据实际部署平台选择合适的CPU型号
|
||||||
|
add_compile_options(-mtune=cortex-a76) # 默认使用cortex-a76
|
||||||
|
|
||||||
|
# 更多ARM64优化选项
|
||||||
|
add_compile_options(
|
||||||
|
-moutline-atomics # 内联原子操作
|
||||||
|
-mstrict-align # 严格对齐优化
|
||||||
|
)
|
||||||
|
|
||||||
else()
|
else()
|
||||||
# ARM32架构
|
# ARM32架构
|
||||||
add_compile_options(-march=armv7-a -mfpu=neon)
|
add_compile_options(-march=armv7-a -mfpu=neon)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# 通用ARM优化选项
|
# 通用ARM优化选项
|
||||||
# 启用循环展开和其他优化
|
add_compile_options(
|
||||||
add_compile_options(-O3 -funroll-loops)
|
-O3 # 最高级别优化
|
||||||
|
-funroll-loops # 循环展开
|
||||||
|
-fomit-frame-pointer # 省略帧指针
|
||||||
|
-frename-registers # 重命名寄存器
|
||||||
|
-fipa-pta # 点对点分析
|
||||||
|
-floop-optimize # 循环优化
|
||||||
|
-fprefetch-loop-arrays # 预取循环数组
|
||||||
|
-funroll-all-loops # 展开所有循环
|
||||||
|
-fpeel-loops # 循环剥离
|
||||||
|
)
|
||||||
|
|
||||||
|
# 浮点运算优化
|
||||||
|
add_compile_options(
|
||||||
|
-ffast-math # 快速数学运算
|
||||||
|
-ffp-contract=fast # 快速浮点收缩
|
||||||
|
-funsafe-math-optimizations # 不安全的数学优化
|
||||||
|
-ftree-vectorize # 树向量化
|
||||||
|
)
|
||||||
|
|
||||||
# 启用链接时优化(LTO)
|
# 启用链接时优化(LTO)
|
||||||
include(CheckIPOSupported)
|
include(CheckIPOSupported)
|
||||||
check_ipo_supported(RESULT result)
|
check_ipo_supported(RESULT result)
|
||||||
if(result)
|
if(result)
|
||||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
|
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||||
# 检查编译器是否支持 thin LTO
|
|
||||||
include(CheckCCompilerFlag)
|
|
||||||
# check_c_compiler_flag("-flto=thin" HAS_THIN_LTO)
|
|
||||||
# if(HAS_THIN_LTO)
|
|
||||||
# add_compile_options(-flto=thin)
|
|
||||||
# else()
|
|
||||||
# # 回退到普通 LTO
|
|
||||||
# add_compile_options(-flto)
|
|
||||||
# endif()
|
|
||||||
add_compile_options(-flto=auto)
|
add_compile_options(-flto=auto)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# 启用快速数学运算(可能影响精度)
|
|
||||||
# add_compile_options(-ffast-math)
|
|
||||||
|
|
||||||
# 针对特定CPU的优化
|
|
||||||
# 可以根据目标设备替换为具体的CPU型号,如"cortex-a72"等
|
|
||||||
add_compile_options(-mtune=cortex-a76)
|
|
||||||
|
|
||||||
message(STATUS "ARM optimizations enabled for ${CMAKE_SYSTEM_PROCESSOR}")
|
message(STATUS "ARM optimizations enabled for ${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
|
|
||||||
# 添加NEON支持的定义
|
# 添加NEON支持的定义
|
||||||
|
|||||||
@@ -51,6 +51,11 @@ else()
|
|||||||
message("Warning: system architecture not detected, defaulting to 64 bit")
|
message("Warning: system architecture not detected, defaulting to 64 bit")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# 修改默认GF_RADIX为64
|
||||||
|
if (NOT DEFINED GF_RADIX)
|
||||||
|
set(GF_RADIX 64)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NOT GF_RADIX STREQUAL "AUTO")
|
if (NOT GF_RADIX STREQUAL "AUTO")
|
||||||
if (NOT((GF_RADIX EQUAL 64) OR (GF_RADIX EQUAL 32)))
|
if (NOT((GF_RADIX EQUAL 64) OR (GF_RADIX EQUAL 32)))
|
||||||
message(FATAL_ERROR "Currently supported options for GF_RADIX: 32 or 64. Aborting")
|
message(FATAL_ERROR "Currently supported options for GF_RADIX: 32 or 64. Aborting")
|
||||||
|
|||||||
@@ -13,9 +13,21 @@ option(ENABLE_STRICT "Build with strict compile options." ON)
|
|||||||
option(ENABLE_TESTS "Enable compilation of tests." ON)
|
option(ENABLE_TESTS "Enable compilation of tests." ON)
|
||||||
option(ENABLE_CT_TESTING "Enable compilation for constant time testing." OFF)
|
option(ENABLE_CT_TESTING "Enable compilation for constant time testing." OFF)
|
||||||
option(ENABLE_SIGN "Build with sign functionality" ON)
|
option(ENABLE_SIGN "Build with sign functionality" ON)
|
||||||
|
option(ENABLE_OPENMP "Enable OpenMP for parallel computation" OFF)
|
||||||
set(GMP_LIBRARY "SYSTEM" CACHE STRING "Which version of GMP to use: SYSTEM, BUILD or MINI")
|
set(GMP_LIBRARY "SYSTEM" CACHE STRING "Which version of GMP to use: SYSTEM, BUILD or MINI")
|
||||||
set(GF_RADIX "AUTO" CACHE STRING "Set the radix for the gf module (currently supported values: 32 or 64), or AUTO.")
|
set(GF_RADIX "AUTO" CACHE STRING "Set the radix for the gf module (currently supported values: 32 or 64), or AUTO.")
|
||||||
|
|
||||||
|
if(ENABLE_OPENMP)
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
if(OpenMP_C_FOUND)
|
||||||
|
add_compile_definitions(HAVE_OPENMP)
|
||||||
|
# 将OpenMP标志添加到全局编译选项,供后续目标使用
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||||
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||||
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NOT DEFINED SQISIGN_BUILD_TYPE)
|
if (NOT DEFINED SQISIGN_BUILD_TYPE)
|
||||||
SET(SQISIGN_BUILD_TYPE "ref")
|
SET(SQISIGN_BUILD_TYPE "ref")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -29,36 +29,40 @@ __attribute__((hot))
|
|||||||
static void
|
static void
|
||||||
AES256_key_schedule(uint8_t subkeys[15][16], const uint8_t *key)
|
AES256_key_schedule(uint8_t subkeys[15][16], const uint8_t *key)
|
||||||
{
|
{
|
||||||
subkeys_t *sk = (subkeys_t *)subkeys;
|
uint8x16_t rcon = vdupq_n_u8(0x01);
|
||||||
uint8_t rcon = 1;
|
// uint8x16_t rcon_step = vdupq_n_u8(0x1b);
|
||||||
uint32_t s;
|
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memcpy(&subkeys[0][0], key, 32 * sizeof(uint8_t));
|
// 一次性复制前两轮密钥
|
||||||
|
memcpy(&subkeys[0][0], key, 32);
|
||||||
|
|
||||||
for (i = 2; i < 14; i += 2) {
|
uint8x16_t prev_key = vld1q_u8(&subkeys[0][0]);
|
||||||
s = AES_sbox_x4(sk->u32[i - 1][3]);
|
uint8x16_t prev_prev_key = vld1q_u8(&subkeys[1][0]);
|
||||||
sk->u32[i][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[i - 2][0];
|
|
||||||
|
|
||||||
for (j = 1; j < 4; j++) {
|
for (int i = 2; i < 15; i++) {
|
||||||
sk->u32[i][j] = sk->u32[i][j - 1] ^ sk->u32[i - 2][j];
|
// 提取最后一列并进行S-box变换
|
||||||
}
|
uint8x16_t last_col = vextq_u8(prev_key, vdupq_n_u8(0), 12);
|
||||||
|
last_col = vaeseq_u8(last_col, vdupq_n_u8(0));
|
||||||
|
|
||||||
s = AES_sbox_x4(sk->u32[i][3]);
|
// RotWord
|
||||||
sk->u32[i + 1][0] = s ^ sk->u32[i - 1][0];
|
last_col = vextq_u8(last_col, last_col, 3);
|
||||||
|
|
||||||
for (j = 1; j < 4; j++) {
|
// XOR with rcon
|
||||||
sk->u32[i + 1][j] = sk->u32[i + 1][j - 1] ^ sk->u32[i - 1][j];
|
uint8x16_t new_key_first = veorq_u8(veorq_u8(last_col, rcon), prev_prev_key);
|
||||||
}
|
|
||||||
|
|
||||||
rcon = (rcon << 1) ^ ((rcon >> 7) * 0x11b);
|
// 生成新密钥的剩余部分
|
||||||
}
|
uint8x16_t new_key = vextq_u8(prev_prev_key, new_key_first, 12);
|
||||||
|
|
||||||
s = AES_sbox_x4(sk->u32[13][3]);
|
// 保存新密钥
|
||||||
sk->u32[14][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[12][0];
|
vst1q_u8(&subkeys[i][0], new_key);
|
||||||
|
|
||||||
for (j = 1; j < 4; j++) {
|
// 更新rcon
|
||||||
sk->u32[14][j] = sk->u32[14][j - 1] ^ sk->u32[12][j];
|
uint8_t rcon_val = vgetq_lane_u8(rcon, 0);
|
||||||
|
rcon_val = (rcon_val << 1) ^ ((rcon_val >> 7) * 0x1b);
|
||||||
|
rcon = vdupq_n_u8(rcon_val);
|
||||||
|
|
||||||
|
// 更新前两个密钥
|
||||||
|
prev_prev_key = prev_key;
|
||||||
|
prev_key = new_key;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -198,7 +202,7 @@ randombytes_init_arm64crypto(unsigned char *entropy_input, unsigned char *person
|
|||||||
DRBG_ctx.reseed_counter = 1;
|
DRBG_ctx.reseed_counter = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define WAYS 4
|
#define WAYS 8
|
||||||
|
|
||||||
int
|
int
|
||||||
randombytes_arm64crypto(unsigned char *x, unsigned long long xlen)
|
randombytes_arm64crypto(unsigned char *x, unsigned long long xlen)
|
||||||
@@ -223,7 +227,10 @@ randombytes_arm64crypto(unsigned char *x, unsigned long long xlen)
|
|||||||
Vle[j] = Vle[j - 1] + 1;
|
Vle[j] = Vle[j - 1] + 1;
|
||||||
V[j] = Vle[j];
|
V[j] = Vle[j];
|
||||||
bswap128(&V[j]);
|
bswap128(&V[j]);
|
||||||
vV.val[j] = vld1q_u8((uint8_t *)&V[j]);
|
// 分批加载到向量寄存器中
|
||||||
|
if (j % 4 == 0 || j == WAYS - 1) {
|
||||||
|
vV = vld1q_u8_x4((uint8_t *)&V[j-3]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int entered_fast_path = (xlen >= WAYS * 16) ? 1 : 0;
|
int entered_fast_path = (xlen >= WAYS * 16) ? 1 : 0;
|
||||||
@@ -232,25 +239,55 @@ randombytes_arm64crypto(unsigned char *x, unsigned long long xlen)
|
|||||||
// 添加预取指令
|
// 添加预取指令
|
||||||
__builtin_prefetch(&x[64], 1, 3);
|
__builtin_prefetch(&x[64], 1, 3);
|
||||||
for (int j = 0; j < WAYS; j++) {
|
for (int j = 0; j < WAYS; j++) {
|
||||||
Vle[j] += 4;
|
Vle[j] += 8; // 增加步长以减少循环次数
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < WAYS; j++) {
|
// 展开循环以减少分支预测失败
|
||||||
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[0]);
|
for (int j = 0; j < WAYS; j += 4) {
|
||||||
vV.val[j] = vaesmcq_u8(vV.val[j]);
|
uint8x16_t state0 = vaeseq_u8(vV.val[0], vsubkeys[0]);
|
||||||
}
|
uint8x16_t state1 = vaeseq_u8(vV.val[1], vsubkeys[0]);
|
||||||
|
uint8x16_t state2 = vaeseq_u8(vV.val[2], vsubkeys[0]);
|
||||||
|
uint8x16_t state3 = vaeseq_u8(vV.val[3], vsubkeys[0]);
|
||||||
|
|
||||||
|
state0 = vaesmcq_u8(state0);
|
||||||
|
state1 = vaesmcq_u8(state1);
|
||||||
|
state2 = vaesmcq_u8(state2);
|
||||||
|
state3 = vaesmcq_u8(state3);
|
||||||
|
|
||||||
for (int i = 1; i < 13; i++) {
|
for (int i = 1; i < 13; i++) {
|
||||||
for (int j = 0; j < WAYS; j++) {
|
state0 = vaeseq_u8(state0, vsubkeys[i]);
|
||||||
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[i]);
|
state1 = vaeseq_u8(state1, vsubkeys[i]);
|
||||||
vV.val[j] = vaesmcq_u8(vV.val[j]);
|
state2 = vaeseq_u8(state2, vsubkeys[i]);
|
||||||
}
|
state3 = vaeseq_u8(state3, vsubkeys[i]);
|
||||||
|
|
||||||
|
state0 = vaesmcq_u8(state0);
|
||||||
|
state1 = vaesmcq_u8(state1);
|
||||||
|
state2 = vaesmcq_u8(state2);
|
||||||
|
state3 = vaesmcq_u8(state3);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < WAYS; j++) {
|
state0 = vaeseq_u8(state0, vsubkeys[13]);
|
||||||
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[13]);
|
state1 = vaeseq_u8(state1, vsubkeys[13]);
|
||||||
vV.val[j] = veorq_u8(vV.val[j], vsubkeys[14]);
|
state2 = vaeseq_u8(state2, vsubkeys[13]);
|
||||||
vst1q_u8(x + j * 16, vV.val[j]);
|
state3 = vaeseq_u8(state3, vsubkeys[13]);
|
||||||
|
|
||||||
|
state0 = veorq_u8(state0, vsubkeys[14]);
|
||||||
|
state1 = veorq_u8(state1, vsubkeys[14]);
|
||||||
|
state2 = veorq_u8(state2, vsubkeys[14]);
|
||||||
|
state3 = veorq_u8(state3, vsubkeys[14]);
|
||||||
|
|
||||||
|
vst1q_u8(x + 0 * 16, state0);
|
||||||
|
vst1q_u8(x + 1 * 16, state1);
|
||||||
|
vst1q_u8(x + 2 * 16, state2);
|
||||||
|
vst1q_u8(x + 3 * 16, state3);
|
||||||
|
|
||||||
|
// 更新向量寄存器
|
||||||
|
if (j + 4 < WAYS) {
|
||||||
|
vV.val[0] = vld1q_u8((uint8_t *)&V[j+4]);
|
||||||
|
vV.val[1] = vld1q_u8((uint8_t *)&V[j+5]);
|
||||||
|
vV.val[2] = vld1q_u8((uint8_t *)&V[j+6]);
|
||||||
|
vV.val[3] = vld1q_u8((uint8_t *)&V[j+7]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < WAYS; j++) {
|
for (int j = 0; j < WAYS; j++) {
|
||||||
@@ -265,8 +302,8 @@ randombytes_arm64crypto(unsigned char *x, unsigned long long xlen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (entered_fast_path && xlen == 0) {
|
if (entered_fast_path && xlen == 0) {
|
||||||
asm volatile("" : "+r,m"(Vle[3]) : : "memory");
|
asm volatile("" : "+r,m"(Vle[7]) : : "memory");
|
||||||
V[0] = Vle[3] - 4;
|
V[0] = Vle[7] - 8;
|
||||||
bswap128(&V[0]);
|
bswap128(&V[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -655,13 +655,48 @@ quat_test_lll_lideal_lideal_mul_reduced()
|
|||||||
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
||||||
ibz_mat_4x4_transpose(&(gram_test), &(gram_test));
|
ibz_mat_4x4_transpose(&(gram_test), &(gram_test));
|
||||||
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
ibz_mat_4x4_mul(&(gram_test), &(gram_test), &(prod.lattice.basis));
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
ibz_vec_4_set(&vec, (i == 0), (i == 1), (i == 2), (i == 3));
|
// ARM优化: 循环展开以减少分支预测失败的可能性
|
||||||
|
// 原始循环:
|
||||||
|
// for (int i = 0; i < 4; i++) {
|
||||||
|
// ibz_vec_4_set(&vec, (i == 0), (i == 1), (i == 2), (i == 3));
|
||||||
|
// quat_qf_eval(&norm, &gram, &vec);
|
||||||
|
// quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||||
|
// ibz_mul(&norm, &(prod.norm), &norm);
|
||||||
|
// res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// 展开后的循环 - 减少循环开销,更适合ARM处理器流水线
|
||||||
|
ibz_vec_4_set(&vec, 1, 0, 0, 0);
|
||||||
quat_qf_eval(&norm, &gram, &vec);
|
quat_qf_eval(&norm, &gram, &vec);
|
||||||
quat_qf_eval(&test_norm, &gram_test, &vec);
|
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||||
ibz_mul(&norm, &(prod.norm), &norm);
|
ibz_mul(&norm, &(prod.norm), &norm);
|
||||||
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||||
}
|
|
||||||
|
ibz_vec_4_set(&vec, 0, 1, 0, 0);
|
||||||
|
quat_qf_eval(&norm, &gram, &vec);
|
||||||
|
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||||
|
ibz_mul(&norm, &(prod.norm), &norm);
|
||||||
|
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||||
|
|
||||||
|
ibz_vec_4_set(&vec, 0, 0, 1, 0);
|
||||||
|
quat_qf_eval(&norm, &gram, &vec);
|
||||||
|
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||||
|
ibz_mul(&norm, &(prod.norm), &norm);
|
||||||
|
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||||
|
|
||||||
|
ibz_vec_4_set(&vec, 0, 0, 0, 1);
|
||||||
|
quat_qf_eval(&norm, &gram, &vec);
|
||||||
|
quat_qf_eval(&test_norm, &gram_test, &vec);
|
||||||
|
ibz_mul(&norm, &(prod.norm), &norm);
|
||||||
|
res = res || !(ibz_cmp(&norm, &test_norm) == 0);
|
||||||
|
|
||||||
|
// 使用NEON优化大整数运算(如果可用)
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
// 在支持NEON的ARM64平台上并行处理多个规范评估
|
||||||
|
// 这里可以进一步优化,但需要重构底层的大整数运算库
|
||||||
|
#endif
|
||||||
|
|
||||||
quat_lattice_hnf(&(prod.lattice));
|
quat_lattice_hnf(&(prod.lattice));
|
||||||
|
|
||||||
res = res || !quat_lideal_equals(&i1, &lideal1, &alg);
|
res = res || !quat_lideal_equals(&i1, &lideal1, &alg);
|
||||||
|
|||||||
@@ -3,6 +3,9 @@
|
|||||||
#include <quaternion_data.h>
|
#include <quaternion_data.h>
|
||||||
#include <id2iso.h>
|
#include <id2iso.h>
|
||||||
#include <torsion_constants.h>
|
#include <torsion_constants.h>
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
void
|
void
|
||||||
secret_key_init(secret_key_t *sk)
|
secret_key_init(secret_key_t *sk)
|
||||||
@@ -27,7 +30,48 @@ protocols_keygen(public_key_t *pk, secret_key_t *sk)
|
|||||||
|
|
||||||
// iterating until a solution has been found
|
// iterating until a solution has been found
|
||||||
while (!found) {
|
while (!found) {
|
||||||
|
// 尝试并行生成多个理想,提高找到解决方案的概率
|
||||||
|
#ifdef _OPENMP
|
||||||
|
int num_threads = omp_get_max_threads();
|
||||||
|
if (num_threads > 1) {
|
||||||
|
int local_found = 0;
|
||||||
|
#pragma omp parallel shared(found, local_found) num_threads(num_threads)
|
||||||
|
{
|
||||||
|
if (!local_found) {
|
||||||
|
secret_key_t local_sk;
|
||||||
|
secret_key_init(&local_sk);
|
||||||
|
|
||||||
|
int thread_found = quat_sampling_random_ideal_O0_given_norm(
|
||||||
|
&local_sk.secret_ideal, &SEC_DEGREE, 1, &QUAT_represent_integer_params, NULL);
|
||||||
|
|
||||||
|
// replacing the secret key ideal by a shorter equivalent one for efficiency
|
||||||
|
thread_found = thread_found && quat_lideal_prime_norm_reduced_equivalent(
|
||||||
|
&local_sk.secret_ideal, &QUATALG_PINFTY, QUAT_primality_num_iter, QUAT_equiv_bound_coeff);
|
||||||
|
|
||||||
|
// ideal to isogeny clapotis
|
||||||
|
ec_basis_t local_B_0_two;
|
||||||
|
thread_found = thread_found && dim2id2iso_arbitrary_isogeny_evaluation(&local_B_0_two, &local_sk.curve, &local_sk.secret_ideal);
|
||||||
|
|
||||||
|
if (thread_found) {
|
||||||
|
#pragma omp critical
|
||||||
|
{
|
||||||
|
if (!local_found) {
|
||||||
|
local_found = 1;
|
||||||
|
found = 1;
|
||||||
|
// Copy local results to global variables
|
||||||
|
quat_left_ideal_copy(&sk->secret_ideal, &local_sk.secret_ideal);
|
||||||
|
ec_curve_copy(&sk->curve, &local_sk.curve);
|
||||||
|
// Copy basis
|
||||||
|
ec_point_copy(&B_0_two.P, &local_B_0_two.P);
|
||||||
|
ec_point_copy(&B_0_two.Q, &local_B_0_two.Q);
|
||||||
|
ec_point_copy(&B_0_two.PmQ, &local_B_0_two.PmQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
secret_key_finalize(&local_sk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
found = quat_sampling_random_ideal_O0_given_norm(
|
found = quat_sampling_random_ideal_O0_given_norm(
|
||||||
&sk->secret_ideal, &SEC_DEGREE, 1, &QUAT_represent_integer_params, NULL);
|
&sk->secret_ideal, &SEC_DEGREE, 1, &QUAT_represent_integer_params, NULL);
|
||||||
|
|
||||||
@@ -36,9 +80,20 @@ protocols_keygen(public_key_t *pk, secret_key_t *sk)
|
|||||||
&sk->secret_ideal, &QUATALG_PINFTY, QUAT_primality_num_iter, QUAT_equiv_bound_coeff);
|
&sk->secret_ideal, &QUATALG_PINFTY, QUAT_primality_num_iter, QUAT_equiv_bound_coeff);
|
||||||
|
|
||||||
// ideal to isogeny clapotis
|
// ideal to isogeny clapotis
|
||||||
|
|
||||||
found = found && dim2id2iso_arbitrary_isogeny_evaluation(&B_0_two, &sk->curve, &sk->secret_ideal);
|
found = found && dim2id2iso_arbitrary_isogeny_evaluation(&B_0_two, &sk->curve, &sk->secret_ideal);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
found = quat_sampling_random_ideal_O0_given_norm(
|
||||||
|
&sk->secret_ideal, &SEC_DEGREE, 1, &QUAT_represent_integer_params, NULL);
|
||||||
|
|
||||||
|
// replacing the secret key ideal by a shorter equivalent one for efficiency
|
||||||
|
found = found && quat_lideal_prime_norm_reduced_equivalent(
|
||||||
|
&sk->secret_ideal, &QUATALG_PINFTY, QUAT_primality_num_iter, QUAT_equiv_bound_coeff);
|
||||||
|
|
||||||
|
// ideal to isogeny clapotis
|
||||||
|
found = found && dim2id2iso_arbitrary_isogeny_evaluation(&B_0_two, &sk->curve, &sk->secret_ideal);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// Assert the isogeny was found and images have the correct order
|
// Assert the isogeny was found and images have the correct order
|
||||||
assert(test_basis_order_twof(&B_0_two, &sk->curve, TORSION_EVEN_POWER));
|
assert(test_basis_order_twof(&B_0_two, &sk->curve, TORSION_EVEN_POWER));
|
||||||
|
|||||||
@@ -78,47 +78,47 @@ double ml_predict_success(long norm_val, int trace_val, int kernel_order) {
|
|||||||
|
|
||||||
// 日志函数:记录一次理想尝试
|
// 日志函数:记录一次理想尝试
|
||||||
// 记录到 CSV 文件,包含时间戳
|
// 记录到 CSV 文件,包含时间戳
|
||||||
void ml_log_ideal_attempt(int attempt,
|
// void ml_log_ideal_attempt(int attempt,
|
||||||
const quat_left_ideal_t *lideal_com,
|
// const quat_left_ideal_t *lideal_com,
|
||||||
int kernel_order,
|
// int kernel_order,
|
||||||
int success_flag)
|
// int success_flag)
|
||||||
{
|
// {
|
||||||
const char *dir_path = "./dataset";
|
// const char *dir_path = "./dataset";
|
||||||
struct stat st = {0};
|
// struct stat st = {0};
|
||||||
|
|
||||||
if (stat(dir_path, &st) == -1) {
|
// if (stat(dir_path, &st) == -1) {
|
||||||
mkdir(dir_path, 0755);
|
// mkdir(dir_path, 0755);
|
||||||
}
|
// }
|
||||||
|
|
||||||
// 使用固定文件名,避免频繁创建文件
|
// // 使用固定文件名,避免频繁创建文件
|
||||||
char csv_path[512];
|
// char csv_path[512];
|
||||||
snprintf(csv_path, sizeof(csv_path), "%s/ideal_data.csv", dir_path);
|
// snprintf(csv_path, sizeof(csv_path), "%s/ideal_data.csv", dir_path);
|
||||||
|
|
||||||
// 使用追加模式,避免覆盖
|
// // 使用追加模式,避免覆盖
|
||||||
FILE *logfile = fopen(csv_path, "a+");
|
// FILE *logfile = fopen(csv_path, "a+");
|
||||||
if (!logfile) {
|
// if (!logfile) {
|
||||||
perror("无法打开 ideal_data CSV 文件");
|
// perror("无法打开 ideal_data CSV 文件");
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
|
|
||||||
// 如果是第一次写入,添加表头
|
// // 如果是第一次写入,添加表头
|
||||||
if (ftell(logfile) == 0) {
|
// if (ftell(logfile) == 0) {
|
||||||
fprintf(logfile, "Timestamp,Attempt,Norm,Trace,KernelOrder,Prob,Success\n");
|
// fprintf(logfile, "Timestamp,Attempt,Norm,Trace,KernelOrder,Prob,Success\n");
|
||||||
}
|
// }
|
||||||
|
|
||||||
// 提取特征和记录
|
// // 提取特征和记录
|
||||||
long norm_val = ibz_to_long_safe(&lideal_com->norm);
|
// long norm_val = ibz_to_long_safe(&lideal_com->norm);
|
||||||
int trace_val = quat_ideal_trace(lideal_com);
|
// int trace_val = quat_ideal_trace(lideal_com);
|
||||||
double prob = ml_predict_success(norm_val, trace_val, kernel_order);
|
// double prob = ml_predict_success(norm_val, trace_val, kernel_order);
|
||||||
|
|
||||||
time_t t = time(NULL);
|
// time_t t = time(NULL);
|
||||||
struct tm tm_info;
|
// struct tm tm_info;
|
||||||
localtime_r(&t, &tm_info);
|
// localtime_r(&t, &tm_info);
|
||||||
char time_str[32];
|
// char time_str[32];
|
||||||
strftime(time_str, sizeof(time_str), "%Y-%m-%d-%H-%M-%S", &tm_info);
|
// strftime(time_str, sizeof(time_str), "%Y-%m-%d-%H-%M-%S", &tm_info);
|
||||||
|
|
||||||
fprintf(logfile, "%s,%d,%ld,%d,%d,%.3f,%d\n",
|
// fprintf(logfile, "%s,%d,%ld,%d,%d,%.3f,%d\n",
|
||||||
time_str, attempt, norm_val, trace_val, kernel_order, prob, success_flag);
|
// time_str, attempt, norm_val, trace_val, kernel_order, prob, success_flag);
|
||||||
|
|
||||||
fclose(logfile);
|
// fclose(logfile);
|
||||||
}
|
// }
|
||||||
@@ -7,6 +7,14 @@
|
|||||||
#include <id2iso.h>
|
#include <id2iso.h>
|
||||||
#include <torsion_constants.h>
|
#include <torsion_constants.h>
|
||||||
#include <encoded_sizes.h>
|
#include <encoded_sizes.h>
|
||||||
|
#include <intbig.h>
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// 添加函数声明
|
||||||
|
static bool execute_commit_serial(ec_curve_t *E_com, ec_basis_t *basis_even_com,
|
||||||
|
quat_left_ideal_t *lideal_com, int *attempt_counter, int kernel_order);
|
||||||
|
|
||||||
// compute the commitment with ideal to isogeny clapotis
|
// compute the commitment with ideal to isogeny clapotis
|
||||||
// and apply it to the basis of E0 (together with the multiplication by some scalar u)
|
// and apply it to the basis of E0 (together with the multiplication by some scalar u)
|
||||||
@@ -36,6 +44,76 @@ commit(ec_curve_t *E_com, ec_basis_t *basis_even_com, quat_left_ideal_t *lideal_
|
|||||||
bool found = false;
|
bool found = false;
|
||||||
int kernel_order = 2;
|
int kernel_order = 2;
|
||||||
|
|
||||||
|
// 使用并行计算来加速随机理想采样
|
||||||
|
#ifdef _OPENMP
|
||||||
|
int num_threads = omp_get_max_threads();
|
||||||
|
if (num_threads > 1) {
|
||||||
|
#pragma omp parallel for reduction(|:found) num_threads(num_threads)
|
||||||
|
for (int i = 0; i < num_threads && !found; i++) {
|
||||||
|
// 为每个线程创建独立的临时变量
|
||||||
|
quat_left_ideal_t local_ideal;
|
||||||
|
quat_left_ideal_init(&local_ideal);
|
||||||
|
|
||||||
|
// 添加线程特定的随机性
|
||||||
|
random_state_t local_rand;
|
||||||
|
random_init(&local_rand);
|
||||||
|
random_add_entropy(&local_rand, (unsigned char*)&i, sizeof(i));
|
||||||
|
|
||||||
|
// 尝试采样理想
|
||||||
|
bool local_found = quat_sampling_random_ideal_O0_given_norm(
|
||||||
|
&local_ideal, &COM_DEGREE, 1, &QUAT_represent_integer_params, &local_rand);
|
||||||
|
|
||||||
|
if (local_found) {
|
||||||
|
// 用机器学习预测理想是否值得尝试
|
||||||
|
long norm_val = mpz_get_si(local_ideal.norm);
|
||||||
|
int trace_val = quat_ideal_trace(&local_ideal);
|
||||||
|
double prob = ml_predict_success(norm_val, trace_val, kernel_order);
|
||||||
|
|
||||||
|
if (prob >= 0.3) { // 概率足够高才继续
|
||||||
|
// 尝试优化等价理想
|
||||||
|
local_found = local_found &&
|
||||||
|
quat_lideal_prime_norm_reduced_equivalent(&local_ideal, &QUATALG_PINFTY,
|
||||||
|
QUAT_primality_num_iter, QUAT_equiv_bound_coeff);
|
||||||
|
|
||||||
|
if (local_found) {
|
||||||
|
#pragma omp critical
|
||||||
|
{
|
||||||
|
if (!found) {
|
||||||
|
// 复制成功的结果到输出参数
|
||||||
|
quat_left_ideal_copy(lideal_com, &local_ideal);
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
quat_left_ideal_finalize(&local_ideal);
|
||||||
|
random_finalize(&local_rand);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果并行搜索成功,则执行同源映射评估
|
||||||
|
if (found) {
|
||||||
|
found = dim2id2iso_arbitrary_isogeny_evaluation(basis_even_com, E_com, lideal_com);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 单线程情况 - 原有逻辑
|
||||||
|
found = execute_commit_serial(E_com, basis_even_com, lideal_com, &attempt_counter, kernel_order);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// 没有OpenMP的情况 - 原有逻辑
|
||||||
|
found = execute_commit_serial(E_com, basis_even_com, lideal_com, &attempt_counter, kernel_order);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 辅助函数:串行提交逻辑
|
||||||
|
static bool execute_commit_serial(ec_curve_t *E_com, ec_basis_t *basis_even_com,
|
||||||
|
quat_left_ideal_t *lideal_com, int *attempt_counter, int kernel_order)
|
||||||
|
{
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
// Step 1. 随机生成理想
|
// Step 1. 随机生成理想
|
||||||
found = quat_sampling_random_ideal_O0_given_norm(lideal_com, &COM_DEGREE, 1,
|
found = quat_sampling_random_ideal_O0_given_norm(lideal_com, &COM_DEGREE, 1,
|
||||||
&QUAT_represent_integer_params, NULL);
|
&QUAT_represent_integer_params, NULL);
|
||||||
@@ -46,7 +124,7 @@ commit(ec_curve_t *E_com, ec_basis_t *basis_even_com, quat_left_ideal_t *lideal_
|
|||||||
|
|
||||||
if (prob < 0.3) {
|
if (prob < 0.3) {
|
||||||
// 概率太低,跳过以节省计算
|
// 概率太低,跳过以节省计算
|
||||||
ml_log_ideal_attempt(attempt_counter++, lideal_com, kernel_order, 0);
|
ml_log_ideal_attempt((*attempt_counter)++, lideal_com, kernel_order, 0);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,7 +137,7 @@ commit(ec_curve_t *E_com, ec_basis_t *basis_even_com, quat_left_ideal_t *lideal_
|
|||||||
found = found && dim2id2iso_arbitrary_isogeny_evaluation(basis_even_com, E_com, lideal_com);
|
found = found && dim2id2iso_arbitrary_isogeny_evaluation(basis_even_com, E_com, lideal_com);
|
||||||
|
|
||||||
// Step 5. 记录结果
|
// Step 5. 记录结果
|
||||||
ml_log_ideal_attempt(attempt_counter++, lideal_com, kernel_order, found ? 1 : 0);
|
ml_log_ideal_attempt((*attempt_counter)++, lideal_com, kernel_order, found ? 1 : 0);
|
||||||
|
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user