second-round version of SQIsign

Co-authored-by: Marius A. Aardal <marius.andre.aardal@gmail.com>
Co-authored-by: Gora Adj <gora.adj@tii.ae>
Co-authored-by: Diego F. Aranha <dfaranha@cs.au.dk>
Co-authored-by: Andrea Basso <sqisign@andreabasso.com>
Co-authored-by: Isaac Andrés Canales Martínez <icanalesm0500@gmail.com>
Co-authored-by: Jorge Chávez-Saab <jorgechavezsaab@gmail.com>
Co-authored-by: Maria Corte-Real Santos <mariascrsantos98@gmail.com>
Co-authored-by: Luca De Feo <github@defeo.lu>
Co-authored-by: Max Duparc <max.duparc@epfl.ch>
Co-authored-by: Jonathan Komada Eriksen <jonathan.eriksen97@gmail.com>
Co-authored-by: Décio Luiz Gazzoni Filho <decio@decpp.net>
Co-authored-by: Basil Hess <bhe@zurich.ibm.com>
Co-authored-by: Antonin Leroux <antonin.leroux@polytechnique.org>
Co-authored-by: Patrick Longa <plonga@microsoft.com>
Co-authored-by: Luciano Maino <mainoluciano.96@gmail.com>
Co-authored-by: Michael Meyer <michael@random-oracles.org>
Co-authored-by: Hiroshi Onuki <onuki@mist.i.u-tokyo.ac.jp>
Co-authored-by: Lorenz Panny <lorenz@yx7.cc>
Co-authored-by: Giacomo Pope <giacomopope@gmail.com>
Co-authored-by: Krijn Reijnders <reijnderskrijn@gmail.com>
Co-authored-by: Damien Robert <damien.robert@inria.fr>
Co-authored-by: Francisco Rodríguez-Henriquez <francisco.rodriguez@tii.ae>
Co-authored-by: Sina Schaeffler <sschaeffle@student.ethz.ch>
Co-authored-by: Benjamin Wesolowski <benjamin.wesolowski@ens-lyon.fr>
This commit is contained in:
SQIsign team
2025-02-06 00:00:00 +00:00
committed by Lorenz Panny
parent ff34a8cd18
commit 91e9e464fe
481 changed files with 80785 additions and 55963 deletions

View File

@@ -1,92 +1,74 @@
# There are the following dependencies
# ┌─┬──────┬─┐ ┌─┬────┬─┐ ┌─┬──────┬─┐
# │ ├──────┤ │ │ ├────┤ │ │ ├──────┤ │
# │ │Keygen│ │ │ │Sign│ │ │ │Verify│ │
# │ ├──────┤ │ │ ├────┤ │ │ ├──────┤ │
# └─┴───┬──┴─┘ └─┴─┬──┴─┘ └─┴───┬──┴─┘
# │ │ │
# │ │ │
# ├────────────────────┼─────────────────┐ │
# │ │ │ │
# │ │ │ │
# ┌───▼──┐ ┌──────▼────────┐ ┌────▼─────▼───────────┐
# │ PRNG ◄────┬─────┤ Iso <-> Ideal ├───► Elliptic Curves, │
# └───▲──┘ │ └──────┬────────┘ │ Pairings & Isogenies │
# │ │ │ └───▲──────┬───────────┘
# │ │ │ │ │
# ┌───┴──┐ │ │ │ │
# │ KLPT ◄────┘ │ ┌──────────┘ │
# └───┬──┘ │ │ │
# │ │ │ │
# ┌─────────▼─────────┐ │ │ │
# │ Quaternion orders │ │ │ ┌────▼───┐
# │ and ideals │ │ │ │ GF(p²) │
# └─────────┬─────────┘ │ │ └────┬───┘
# │ ┌─┬──────▼─────┴──┬─┐ │
# ┌─────▼─────┐ │ ├───────────────┤ │ ┌─────▼─────┐
# │ MP BigInt │ │ │Precomputations│ │ │ FP BigInt │
# └───────────┘ │ ├───────────────┤ │ └───────────┘
# └─┴───────────────┴─┘
add_subdirectory(common)
add_subdirectory(intbig)
add_subdirectory(quaternion)
add_subdirectory(precomp)
add_subdirectory(klpt)
add_subdirectory(gf)
add_subdirectory(ec)
add_subdirectory(id2iso)
add_subdirectory(protocols)
if(ENABLE_SIGN)
add_subdirectory(quaternion)
endif()
add_subdirectory(mp)
add_subdirectory(gf)
add_subdirectory(precomp)
add_subdirectory(ec)
add_subdirectory(hd)
add_subdirectory(verification)
if(ENABLE_SIGN)
add_subdirectory(id2iso)
add_subdirectory(signature)
endif()
FOREACH(SVARIANT ${SVARIANT_S})
string(TOLOWER ${SVARIANT} SVARIANT_LOWER)
string(TOUPPER ${SVARIANT} SVARIANT_UPPER)
set(SOURCE_FILES_VARIANT sqisign.c)
# Library for SQIsign variant
add_library(sqisign_${SVARIANT_LOWER} ${SOURCE_FILES_VARIANT})
target_link_libraries(sqisign_${SVARIANT_LOWER} PUBLIC
${LIB_PROTOCOLS_${SVARIANT_UPPER}}
${LIB_ID2ISO_${SVARIANT_UPPER}}
${LIB_KLPT_${SVARIANT_UPPER}}
${LIB_QUATERNION}
${LIB_PRECOMP_${SVARIANT_UPPER}}
${LIB_INTBIG}
${LIB_GF_${SVARIANT_UPPER}}
${LIB_EC_${SVARIANT_UPPER}}
${GMP}
target_link_libraries(sqisign_${SVARIANT_LOWER} PUBLIC
$<$<BOOL:${ENABLE_SIGN}>:${LIB_SIGNATURE_${SVARIANT_UPPER}}>
${LIB_VERIFICATION_${SVARIANT_UPPER}}
$<$<BOOL:${ENABLE_SIGN}>:${LIB_ID2ISO_${SVARIANT_UPPER}}>
$<$<BOOL:${ENABLE_SIGN}>:${LIB_QUATERNION}>
${LIB_MP}
${LIB_GF_${SVARIANT_UPPER}}
${LIB_EC_${SVARIANT_UPPER}}
${LIB_HD_${SVARIANT_UPPER}}
${LIB_PRECOMP_${SVARIANT_UPPER}}
$<$<BOOL:${ENABLE_SIGN}>:GMP>
sqisign_common_sys
)
target_include_directories(sqisign_${SVARIANT_LOWER} PUBLIC ${INC_PROTOCOLS} ${INC_INTBIG} ${INC_QUATERNION} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_EC} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_KLPT} ${INC_ID2ISO} ../include PRIVATE common/generic internal)
target_include_directories(sqisign_${SVARIANT_LOWER} PUBLIC $<$<BOOL:${ENABLE_SIGN}>:${INC_SIGNATURE}> ${INC_VERIFICATION} $<$<BOOL:${ENABLE_SIGN}>:${INC_QUATERNION}> ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_MP} ${INC_EC} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_HD} $<$<BOOL:${ENABLE_SIGN}>:${INC_ID2ISO}> ../include PRIVATE common/generic internal)
target_compile_definitions(sqisign_${SVARIANT_LOWER} PUBLIC SQISIGN_VARIANT=${SVARIANT})
# Library for SQIsign variant (test)
add_library(sqisign_${SVARIANT_LOWER}_test ${SOURCE_FILES_VARIANT})
target_link_libraries(sqisign_${SVARIANT_LOWER}_test PUBLIC
${LIB_PROTOCOLS_${SVARIANT_UPPER}}
${LIB_ID2ISO_${SVARIANT_UPPER}}
${LIB_KLPT_${SVARIANT_UPPER}}
${LIB_QUATERNION}
${LIB_PRECOMP_${SVARIANT_UPPER}}
${LIB_INTBIG}
${LIB_GF_${SVARIANT_UPPER}}
${LIB_EC_${SVARIANT_UPPER}}
${GMP}
target_link_libraries(sqisign_${SVARIANT_LOWER}_test PUBLIC
$<$<BOOL:${ENABLE_SIGN}>:${LIB_SIGNATURE_${SVARIANT_UPPER}}>
${LIB_VERIFICATION_${SVARIANT_UPPER}}
$<$<BOOL:${ENABLE_SIGN}>:${LIB_ID2ISO_${SVARIANT_UPPER}}>
$<$<BOOL:${ENABLE_SIGN}>:${LIB_QUATERNION}>
${LIB_MP}
${LIB_GF_${SVARIANT_UPPER}}
${LIB_EC_${SVARIANT_UPPER}}
${LIB_HD_${SVARIANT_UPPER}}
${LIB_PRECOMP_${SVARIANT_UPPER}}
$<$<BOOL:${ENABLE_SIGN}>:GMP>
sqisign_common_test
)
target_include_directories(sqisign_${SVARIANT_LOWER}_test PUBLIC ${INC_PROTOCOLS} ${INC_INTBIG} ${INC_QUATERNION} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_EC} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_KLPT} ${INC_ID2ISO} ../include PRIVATE common/generic internal)
target_include_directories(sqisign_${SVARIANT_LOWER}_test PUBLIC $<$<BOOL:${ENABLE_SIGN}>:${INC_SIGNATURE}> ${INC_VERIFICATION} $<$<BOOL:${ENABLE_SIGN}>:${INC_QUATERNION}> ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_MP} ${INC_EC} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_HD} $<$<BOOL:${ENABLE_SIGN}>:${INC_ID2ISO}> ../include PRIVATE common/generic internal)
target_compile_definitions(sqisign_${SVARIANT_LOWER}_test PUBLIC SQISIGN_VARIANT=${SVARIANT})
# Library with NIST API
set(SOURCE_FILE_NISTAPI nistapi/${SVARIANT_LOWER}/api.c)
add_library(sqisign_${SVARIANT_LOWER}_nistapi ${SOURCE_FILE_NISTAPI})
target_link_libraries(sqisign_${SVARIANT_LOWER}_nistapi PRIVATE sqisign_${SVARIANT_LOWER})
target_link_libraries(sqisign_${SVARIANT_LOWER}_nistapi PUBLIC sqisign_${SVARIANT_LOWER})
target_include_directories(sqisign_${SVARIANT_LOWER}_nistapi PUBLIC nistapi/${SVARIANT_LOWER} PUBLIC ../include)
target_compile_definitions(sqisign_${SVARIANT_LOWER}_nistapi PUBLIC SQISIGN_VARIANT=${SVARIANT})
# Library with NIST API (test)
add_library(sqisign_${SVARIANT_LOWER}_test_nistapi ${SOURCE_FILE_NISTAPI})
target_link_libraries(sqisign_${SVARIANT_LOWER}_test_nistapi PRIVATE sqisign_${SVARIANT_LOWER}_test)
target_link_libraries(sqisign_${SVARIANT_LOWER}_test_nistapi PUBLIC sqisign_${SVARIANT_LOWER}_test)
target_include_directories(sqisign_${SVARIANT_LOWER}_test_nistapi PUBLIC nistapi/${SVARIANT_LOWER})
target_compile_definitions(sqisign_${SVARIANT_LOWER}_test_nistapi PUBLIC SQISIGN_VARIANT=${SVARIANT})
ENDFOREACH()

View File

@@ -1,3 +1,8 @@
if (POLICY CMP0076)
cmake_policy(SET CMP0076 NEW)
endif()
get_filename_component(CCSD_NAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
string(TOUPPER ${CCSD_NAME} CCSD_NAME_UPPER)
include(${SELECT_SQISIGN_VARIANT})
include(${SELECT_IMPL_TYPE})

View File

@@ -0,0 +1,40 @@
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(SOURCE_FILES_COMMON_ARM64CRYPTO randombytes_ctrdrbg_inline_asm.c)
else()
set(SOURCE_FILES_COMMON_ARM64CRYPTO randombytes_ctrdrbg.c)
set_source_files_properties(randombytes_ctrdrbg.c PROPERTIES COMPILE_FLAGS -fno-strict-aliasing)
endif()
foreach(SQISIGN_COMMON_TARGET sqisign_common_test sqisign_common_sys)
target_sources(${SQISIGN_COMMON_TARGET} PRIVATE ${SOURCE_FILES_COMMON_ARM64CRYPTO})
target_include_directories(${SQISIGN_COMMON_TARGET} PRIVATE include)
target_compile_definitions(${SQISIGN_COMMON_TARGET} PRIVATE RANDOMBYTES_ARM64CRYPTO)
target_compile_options(${SQISIGN_COMMON_TARGET} PRIVATE -march=armv8-a+crypto)
endforeach()
set(SOURCE_FILES_CTRDRBG_TEST_BENCHMARK
${SOURCE_FILES_COMMON_ARM64CRYPTO}
../ref/aes_c.c
../ref/randombytes_ctrdrbg.c
../generic/randombytes_system.c
)
add_executable(sqisign_test_ctrdrbg_arm64crypto ${SOURCE_FILES_CTRDRBG_TEST_BENCHMARK} ../generic/test/test_ctrdrbg.c)
target_include_directories(sqisign_test_ctrdrbg_arm64crypto PRIVATE ${INC_PUBLIC} ${INC_COMMON} include ../ref/include)
target_compile_definitions(sqisign_test_ctrdrbg_arm64crypto PRIVATE
CTRDRBG_TEST_BENCH
RANDOMBYTES_INIT_PLATFORM=randombytes_init_arm64crypto
RANDOMBYTES_PLATFORM=randombytes_arm64crypto)
target_compile_options(sqisign_test_ctrdrbg_arm64crypto PRIVATE -march=armv8-a+crypto)
add_test(sqisign_test_ctrdrbg_arm64crypto sqisign_test_ctrdrbg_arm64crypto)
add_executable(sqisign_bench_ctrdrbg_arm64crypto ${SOURCE_FILES_CTRDRBG_TEST_BENCHMARK} ../generic/test/bench_ctrdrbg.c)
target_include_directories(sqisign_bench_ctrdrbg_arm64crypto PRIVATE ${INC_PUBLIC} ${INC_COMMON} include ../ref/include)
target_compile_definitions(sqisign_bench_ctrdrbg_arm64crypto PRIVATE
CTRDRBG_TEST_BENCH
RANDOMBYTES_INIT_PLATFORM=randombytes_init_arm64crypto
RANDOMBYTES_PLATFORM=randombytes_arm64crypto)
target_compile_options(sqisign_bench_ctrdrbg_arm64crypto PRIVATE -march=armv8-a+crypto)
set(BM_BINS ${BM_BINS} sqisign_bench_ctrdrbg_arm64crypto CACHE INTERNAL "List of benchmark executables")

View File

@@ -0,0 +1,27 @@
// SPDX-License-Identifier: Apache-2.0
#ifndef RANDOMBYTES_ARM64CRYPTO_H
#define RANDOMBYTES_ARM64CRYPTO_H
#include <stdio.h>
#define RNG_SUCCESS 0
#define RNG_BAD_MAXLEN -1
#define RNG_BAD_OUTBUF -2
#define RNG_BAD_REQ_LEN -3
typedef struct {
unsigned char buffer[16];
int buffer_pos;
unsigned long length_remaining;
unsigned char key[32];
unsigned char ctr[16];
} AES_XOF_struct;
typedef struct {
unsigned char Key[32];
unsigned char V[16];
int reseed_counter;
} AES256_CTR_DRBG_struct;
#endif /* RANDOMBYTES_ARM64CRYPTO_H */

View File

@@ -0,0 +1,276 @@
// SPDX-License-Identifier: Apache-2.0
#include "randombytes_arm64crypto.h"
#include <arm_neon.h>
#include <string.h>
static AES256_CTR_DRBG_struct DRBG_ctx;
static inline uint32_t AES_sbox_x4(uint32_t in) {
uint8x16_t sbox_val = vreinterpretq_u8_u32(vdupq_n_u32(in));
sbox_val = vaeseq_u8(sbox_val, vdupq_n_u8(0));
return vgetq_lane_u32(vreinterpretq_u32_u8(sbox_val), 0);
}
#define ROTR32(x, n) ((x << (32 - n)) | (x >> n))
typedef union {
uint8_t u8[15][16];
uint32_t u32[15][4];
} subkeys_t;
static void AES256_key_schedule(uint8_t subkeys[15][16], const uint8_t *key) {
subkeys_t *sk = (subkeys_t *)subkeys;
uint8_t rcon = 1;
uint32_t s;
int i, j;
memcpy(&subkeys[0][0], key, 32 * sizeof(uint8_t));
for (i = 2; i < 14; i += 2) {
s = AES_sbox_x4(sk->u32[i - 1][3]);
sk->u32[i][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[i - 2][0];
for (j = 1; j < 4; j++) {
sk->u32[i][j] = sk->u32[i][j - 1] ^ sk->u32[i - 2][j];
}
s = AES_sbox_x4(sk->u32[i][3]);
sk->u32[i + 1][0] = s ^ sk->u32[i - 1][0];
for (j = 1; j < 4; j++) {
sk->u32[i + 1][j] = sk->u32[i + 1][j - 1] ^ sk->u32[i - 1][j];
}
rcon = (rcon << 1) ^ ((rcon >> 7) * 0x11b);
}
s = AES_sbox_x4(sk->u32[13][3]);
sk->u32[14][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[12][0];
for (j = 1; j < 4; j++) {
sk->u32[14][j] = sk->u32[14][j - 1] ^ sk->u32[12][j];
}
}
#define AES256_ECB_XWAYS(ways, vsubkeys, ctr, out) \
do { \
uint8x16_t state[ways]; \
\
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(ctr[j], vsubkeys[0]); \
state[j] = vaesmcq_u8(state[j]); \
} \
\
for (int i = 1; i < 13; i++) { \
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[i]); \
state[j] = vaesmcq_u8(state[j]); \
} \
} \
\
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[13]); \
state[j] = veorq_u8(state[j], vsubkeys[14]); \
vst1q_u8(out + j * 16, state[j]); \
} \
} while (0);
// subkeys - subkeys for AES-256
// ctr - a 128-bit plaintext value
// buffer - a 128-bit ciphertext value
static void AES256_ECB(uint8x16_t vsubkeys[15], uint8x16_t ctr,
unsigned char *buffer) {
AES256_ECB_XWAYS(1, vsubkeys, (&ctr), buffer);
}
// vsubkeys - subkeys for AES-256
// ctr - an array of 3 x 128-bit plaintext value
// buffer - an array of 3 x 128-bit ciphertext value
static void AES256_ECB_x3(uint8x16_t vsubkeys[15], uint8x16_t ctr[3],
unsigned char *buffer) {
AES256_ECB_XWAYS(3, vsubkeys, ctr, buffer);
}
static void bswap128(__uint128_t *x) {
uint64_t *x64 = (uint64_t *)x;
uint64_t t = x64[0];
x64[0] = x64[1];
x64[1] = t;
x64[0] = __builtin_bswap64(x64[0]);
x64[1] = __builtin_bswap64(x64[1]);
}
static void add_to_V(unsigned char V[], int incr) {
__uint128_t *V128 = (__uint128_t *)V;
bswap128(V128);
(*V128) += incr;
bswap128(V128);
}
static void AES256_CTR_DRBG_Update(unsigned char *provided_data,
uint8x16_t vsubkeys[15], unsigned char *Key,
unsigned char *V) {
unsigned char temp[48];
__uint128_t V128, t;
uint64x2_t vV[3];
memcpy(&V128, DRBG_ctx.V, sizeof(V128));
bswap128(&V128);
for (int j = 0; j < 3; j++) {
V128++;
t = V128;
bswap128(&t);
vV[j] = vld1q_u64((uint64_t *)&t);
}
AES256_ECB_x3(vsubkeys, (uint8x16_t *)vV, temp);
if (provided_data != NULL)
for (int i = 0; i < 48; i++)
temp[i] ^= provided_data[i];
memcpy(Key, temp, 32);
memcpy(V, temp + 32, 16);
add_to_V(DRBG_ctx.V, 1);
}
void randombytes_init_arm64crypto(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
(void)security_strength;
unsigned char seed_material[48];
uint8_t subkeys[15][16];
uint8x16_t vsubkeys[15];
memcpy(seed_material, entropy_input, 48);
if (personalization_string)
for (int i = 0; i < 48; i++)
seed_material[i] ^= personalization_string[i];
memset(DRBG_ctx.Key, 0x00, 32);
memset(DRBG_ctx.V, 0x00, 16);
AES256_key_schedule(subkeys, DRBG_ctx.Key);
for (int i = 0; i < 15; i++) {
vsubkeys[i] = vld1q_u8(subkeys[i]);
}
AES256_CTR_DRBG_Update(seed_material, vsubkeys, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter = 1;
}
#define WAYS 4
int randombytes_arm64crypto(unsigned char *x, unsigned long long xlen) {
uint8_t subkeys[15][16];
unsigned char block[16];
__uint128_t V[WAYS], Vle[WAYS];
uint8x16x4_t vV;
uint8x16_t vsubkeys[15];
AES256_key_schedule(subkeys, DRBG_ctx.Key);
for (int j = 0; j < 15; j++) {
vsubkeys[j] = vld1q_u8(subkeys[j]);
}
memcpy(&Vle[0], DRBG_ctx.V, sizeof(Vle[0]));
V[0] = Vle[0];
vV.val[0] = vld1q_u8((uint8_t *)&V[0]);
bswap128(&Vle[0]);
for (int j = 1; j < WAYS; j++) {
Vle[j] = Vle[j - 1] + 1;
V[j] = Vle[j];
bswap128(&V[j]);
vV.val[j] = vld1q_u8((uint8_t *)&V[j]);
}
int entered_fast_path = (xlen >= WAYS * 16) ? 1 : 0;
while (xlen >= WAYS * 16) {
for (int j = 0; j < WAYS; j++) {
Vle[j] += 4;
}
for (int j = 0; j < WAYS; j++) {
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[0]);
vV.val[j] = vaesmcq_u8(vV.val[j]);
}
for (int i = 1; i < 13; i++) {
for (int j = 0; j < WAYS; j++) {
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[i]);
vV.val[j] = vaesmcq_u8(vV.val[j]);
}
}
for (int j = 0; j < WAYS; j++) {
vV.val[j] = vaeseq_u8(vV.val[j], vsubkeys[13]);
vV.val[j] = veorq_u8(vV.val[j], vsubkeys[14]);
vst1q_u8(x + j * 16, vV.val[j]);
}
for (int j = 0; j < WAYS; j++) {
V[j] = Vle[j];
bswap128(&V[j]);
}
vV = vld1q_u8_x4((uint8_t *)V);
x += WAYS * 16;
xlen -= WAYS * 16;
}
if (entered_fast_path && xlen == 0) {
asm volatile("" : "+r,m"(Vle[3]) : : "memory");
V[0] = Vle[3] - 4;
bswap128(&V[0]);
}
while (xlen > 0) {
if (xlen > 16) {
AES256_ECB(vsubkeys, vld1q_u8((uint8_t *)&V[0]), x);
x += 16;
xlen -= 16;
Vle[0]++;
V[0] = Vle[0];
bswap128(&V[0]);
} else {
AES256_ECB(vsubkeys, vld1q_u8((uint8_t *)&V[0]), block);
memcpy(x, block, xlen);
xlen = 0;
}
}
memcpy(DRBG_ctx.V, &V[0], sizeof(V[0]));
AES256_CTR_DRBG_Update(NULL, vsubkeys, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter++;
return RNG_SUCCESS;
}
#ifdef RANDOMBYTES_ARM64CRYPTO
int randombytes(unsigned char *random_array, unsigned long long nbytes) {
int ret = randombytes_arm64crypto(random_array, nbytes);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret);
#endif
return ret;
}
void randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
randombytes_init_arm64crypto(entropy_input, personalization_string,
security_strength);
}
#endif

View File

@@ -0,0 +1,422 @@
// SPDX-License-Identifier: Apache-2.0
#include <arm_neon.h>
#include <string.h>
#include "randombytes_arm64crypto.h"
typedef union {
uint8_t u8[16];
uint64_t u64[2];
__uint128_t u128;
} u128_t;
static AES256_CTR_DRBG_struct DRBG_ctx;
static inline uint32_t AES_sbox_x4(uint32_t in) {
uint8x16_t sbox_val = vreinterpretq_u8_u32(vdupq_n_u32(in));
sbox_val = vaeseq_u8(sbox_val, vdupq_n_u8(0));
return vgetq_lane_u32(vreinterpretq_u32_u8(sbox_val), 0);
}
#define ROTR32(x, n) ((x << (32 - n)) | (x >> n))
typedef union {
uint32_t u32[15][4];
} subkeys_t;
static void AES256_key_schedule(uint8_t subkeys[15][16], const uint8_t *key) {
subkeys_t *sk = (subkeys_t *)subkeys;
uint8_t rcon = 1;
uint32_t s;
int i, j;
memcpy(&subkeys[0][0], key, 32 * sizeof(uint8_t));
for (i = 2; i < 14; i += 2) {
s = AES_sbox_x4(sk->u32[i - 1][3]);
sk->u32[i][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[i - 2][0];
for (j = 1; j < 4; j++) {
sk->u32[i][j] = sk->u32[i][j - 1] ^ sk->u32[i - 2][j];
}
s = AES_sbox_x4(sk->u32[i][3]);
sk->u32[i + 1][0] = s ^ sk->u32[i - 1][0];
for (j = 1; j < 4; j++) {
sk->u32[i + 1][j] = sk->u32[i + 1][j - 1] ^ sk->u32[i - 1][j];
}
rcon = (rcon << 1) ^ ((rcon >> 7) * 0x11b);
}
s = AES_sbox_x4(sk->u32[13][3]);
sk->u32[14][0] = ROTR32(s, 8) ^ rcon ^ sk->u32[12][0];
for (j = 1; j < 4; j++) {
sk->u32[14][j] = sk->u32[14][j - 1] ^ sk->u32[12][j];
}
}
#define AES256_ECB_XWAYS(ways, vsubkeys, ctr, out) \
do { \
uint8x16_t state[ways]; \
\
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(ctr[j], vsubkeys[0]); \
state[j] = vaesmcq_u8(state[j]); \
} \
\
for (int i = 1; i < 13; i++) { \
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[i]); \
state[j] = vaesmcq_u8(state[j]); \
} \
} \
\
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[13]); \
state[j] = veorq_u8(state[j], vsubkeys[14]); \
vst1q_u8(out + j * 16, state[j]); \
} \
} while (0);
// subkeys - subkeys for AES-256
// ctr - a 128-bit plaintext value
// buffer - a 128-bit ciphertext value
static void AES256_ECB(uint8x16_t vsubkeys[15], uint8x16_t ctr,
unsigned char *buffer) {
AES256_ECB_XWAYS(1, vsubkeys, (&ctr), buffer);
}
// vsubkeys - subkeys for AES-256
// ctr - an array of 3 x 128-bit plaintext value
// buffer - an array of 3 x 128-bit ciphertext value
static void AES256_ECB_x3(uint8x16_t vsubkeys[15], uint8x16_t ctr[3],
unsigned char *buffer) {
AES256_ECB_XWAYS(3, vsubkeys, ctr, buffer);
}
static void bswap128(u128_t *x) {
uint64_t t = x->u64[0];
x->u64[0] = x->u64[1];
x->u64[1] = t;
x->u64[0] = __builtin_bswap64(x->u64[0]);
x->u64[1] = __builtin_bswap64(x->u64[1]);
}
static void incr_V(u128_t *V) {
bswap128(V);
V->u128++;
bswap128(V);
}
static void AES256_CTR_DRBG_Update(const unsigned char *provided_data,
uint8x16_t vsubkeys[15], unsigned char *Key,
unsigned char *V) {
(void)V;
unsigned char temp[48];
u128_t V128, t;
uint64x2_t vV[3];
memcpy(&V128, DRBG_ctx.V, sizeof(V128));
bswap128(&V128);
for (int j = 0; j < 3; j++) {
V128.u128++;
t = V128;
bswap128(&t);
vV[j] = vld1q_u64((uint64_t *)&t);
}
AES256_ECB_x3(vsubkeys, (uint8x16_t *)vV, temp);
if (provided_data != NULL)
for (int i = 0; i < 48; i++)
temp[i] ^= provided_data[i];
memcpy(Key, temp, 32);
memcpy(V128.u8, temp + 32, 16);
incr_V(&V128);
memcpy(DRBG_ctx.V, V128.u8, 16);
}
void randombytes_init_arm64crypto(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
(void)security_strength;
unsigned char seed_material[48];
uint8_t subkeys[15][16];
uint8x16_t vsubkeys[15];
memcpy(seed_material, entropy_input, 48);
if (personalization_string)
for (int i = 0; i < 48; i++)
seed_material[i] ^= personalization_string[i];
memset(DRBG_ctx.Key, 0x00, 32);
memset(DRBG_ctx.V, 0x00, 16);
AES256_key_schedule(subkeys, DRBG_ctx.Key);
for (int i = 0; i < 15; i++) {
vsubkeys[i] = vld1q_u8(subkeys[i]);
}
AES256_CTR_DRBG_Update(seed_material, vsubkeys, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter = 1;
}
#define WAYS 4
int randombytes_arm64crypto(unsigned char *x, unsigned long long xlen) {
uint8_t subkeys[15][16];
unsigned char block[16];
u128_t V[WAYS], Vle[WAYS];
uint8x16x4_t vV;
uint8x16_t vsubkeys[15];
AES256_key_schedule(subkeys, DRBG_ctx.Key);
for (int j = 0; j < 15; j++) {
vsubkeys[j] = vld1q_u8(subkeys[j]);
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
asm("ldp %[V0l], %[V0h], %[DRBG_ctx_V] \n\t"
"stp %[V0l], %[V0h], [%[V] ] \n\t"
"rev %[Vle0h], %[V0l] \n\t"
"rev %[Vle0l], %[V0h] \n\t"
"adds %[Vle1l], %[Vle0l], #1 \n\t"
"adc %[Vle1h], %[Vle0h], xzr \n\t"
"rev %[V1h], %[Vle1l] \n\t"
"rev %[V1l], %[Vle1h] \n\t"
"stp %[V1l], %[V1h], [%[V], #16] \n\t"
"adds %[Vle2l], %[Vle0l], #2 \n\t"
"adc %[Vle2h], %[Vle0h], xzr \n\t"
"rev %[V2h], %[Vle2l] \n\t"
"rev %[V2l], %[Vle2h] \n\t"
"stp %[V2l], %[V2h], [%[V], #32] \n\t"
"adds %[Vle3l], %[Vle0l], #3 \n\t"
"adc %[Vle3h], %[Vle0h], xzr \n\t"
"rev %[V3h], %[Vle3l] \n\t"
"rev %[V3l], %[Vle3h] \n\t"
"stp %[V3l], %[V3h], [%[V], #48] \n\t"
"ld1 { %[vV0].16b, %[vV1].16b, %[vV2].16b, %[vV3].16b }, [%[V]]\n\t"
"cmp %[xlen], #64 \n\t"
"b.lo 2f \n\t"
".p2align 6 \n\t"
"1: \n\t"
"aese %[vV0].16b, %[vsk0].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk0].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk0].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk0].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk1].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk1].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk1].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk1].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"adds %[Vle0l], %[Vle0l], #4 \n\t"
"adc %[Vle0h], %[Vle0h], xzr \n\t"
"adds %[Vle1l], %[Vle1l], #4 \n\t"
"adc %[Vle1h], %[Vle1h], xzr \n\t"
"adds %[Vle2l], %[Vle2l], #4 \n\t"
"adc %[Vle2h], %[Vle2h], xzr \n\t"
"adds %[Vle3l], %[Vle3l], #4 \n\t"
"adc %[Vle3h], %[Vle3h], xzr \n\t"
"aese %[vV0].16b, %[vsk2].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk2].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk2].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk2].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk3].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk3].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk3].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk3].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"rev %[V0h], %[Vle0l] \n\t"
"rev %[V0l], %[Vle0h] \n\t"
"rev %[V1h], %[Vle1l] \n\t"
"rev %[V1l], %[Vle1h] \n\t"
"rev %[V2h], %[Vle2l] \n\t"
"rev %[V2l], %[Vle2h] \n\t"
"rev %[V3h], %[Vle3l] \n\t"
"rev %[V3l], %[Vle3h] \n\t"
"aese %[vV0].16b, %[vsk4].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk4].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk4].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk4].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk5].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk5].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk5].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk5].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk6].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk6].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk6].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk6].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk7].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk7].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk7].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk7].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk8].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk8].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk8].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk8].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk9].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk9].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk9].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk9].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"stp %[V0l], %[V0h], [%[V]] \n\t"
"stp %[V1l], %[V1h], [%[V], #16] \n\t"
"stp %[V2l], %[V2h], [%[V], #32] \n\t"
"stp %[V3l], %[V3h], [%[V], #48] \n\t"
"aese %[vV0].16b, %[vsk10].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk10].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk10].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk10].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk11].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk11].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk11].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk11].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk12].16b \n\t"
"aesmc %[vV0].16b, %[vV0].16b \n\t"
"aese %[vV1].16b, %[vsk12].16b \n\t"
"aesmc %[vV1].16b, %[vV1].16b \n\t"
"aese %[vV2].16b, %[vsk12].16b \n\t"
"aesmc %[vV2].16b, %[vV2].16b \n\t"
"aese %[vV3].16b, %[vsk12].16b \n\t"
"aesmc %[vV3].16b, %[vV3].16b \n\t"
"aese %[vV0].16b, %[vsk13].16b \n\t"
"eor %[vV0].16b, %[vV0].16b, %[vsk14].16b \n\t"
"aese %[vV1].16b, %[vsk13].16b \n\t"
"eor %[vV1].16b, %[vV1].16b, %[vsk14].16b \n\t"
"stp %q[vV0], %q[vV1], [%[x]], #32\n\t"
"aese %[vV2].16b, %[vsk13].16b \n\t"
"eor %[vV2].16b, %[vV2].16b, %[vsk14].16b \n\t"
"aese %[vV3].16b, %[vsk13].16b \n\t"
"eor %[vV3].16b, %[vV3].16b, %[vsk14].16b \n\t"
"stp %q[vV2], %q[vV3], [%[x]], #32\n\t"
"sub %[xlen], %[xlen], #64 \n\t"
"ld1 { %[vV0].16b, %[vV1].16b, %[vV2].16b, %[vV3].16b }, [%[V]]\n\t"
"cmp %[xlen], #64 \n\t"
"b.hs 1b \n\t"
"cbnz %[xlen], 2f \n\t"
"subs %[V0h], %[Vle3l], #4 \n\t"
"sbc %[V0l], %[Vle3h], xzr \n\t"
"rev %[V0h], %[V0h] \n\t"
"rev %[V0l], %[V0l] \n\t"
"stp %[V0l], %[V0h], [%[V]] \n\t"
"2: \n\t"
: [vV0] "=&w"(vV.val[0]), [vV1] "=&w"(vV.val[1]), [vV2] "=&w"(vV.val[2]),
[vV3] "=&w"(vV.val[3]), [Vle0l] "=&r"(Vle[0].u64[0]),
[Vle0h] "=&r"(Vle[0].u64[1]), [Vle1l] "=&r"(Vle[1].u64[0]),
[Vle1h] "=&r"(Vle[1].u64[1]), [Vle2l] "=&r"(Vle[2].u64[0]),
[Vle2h] "=&r"(Vle[2].u64[1]), [Vle3l] "=&r"(Vle[3].u64[0]),
[Vle3h] "=&r"(Vle[3].u64[1]), [x] "+r"(x), [xlen] "+r"(xlen),
[V0l] "=&r"(V[0].u64[0]), [V0h] "=&r"(V[0].u64[1]),
[V1l] "=&r"(V[1].u64[0]), [V1h] "=&r"(V[1].u64[1]),
[V2l] "=&r"(V[2].u64[0]), [V2h] "=&r"(V[2].u64[1]),
[V3l] "=&r"(V[3].u64[0]), [V3h] "=&r"(V[3].u64[1]),
"=m"(*(unsigned char(*)[64])x), "=m"(*(unsigned char(*)[64])V)
:
[vsk0] "w"(vsubkeys[0]), [vsk1] "w"(vsubkeys[1]), [vsk2] "w"(vsubkeys[2]),
[vsk3] "w"(vsubkeys[3]), [vsk4] "w"(vsubkeys[4]), [vsk5] "w"(vsubkeys[5]),
[vsk6] "w"(vsubkeys[6]), [vsk7] "w"(vsubkeys[7]), [vsk8] "w"(vsubkeys[8]),
[vsk9] "w"(vsubkeys[9]), [vsk10] "w"(vsubkeys[10]),
[vsk11] "w"(vsubkeys[11]), [vsk12] "w"(vsubkeys[12]),
[vsk13] "w"(vsubkeys[13]), [vsk14] "w"(vsubkeys[14]), [V] "r"(V),
[DRBG_ctx_V] "m"(DRBG_ctx.V)
: "cc");
#pragma GCC diagnostic pop
while (xlen > 0) {
if (xlen > 16) {
AES256_ECB(vsubkeys, vld1q_u8((uint8_t *)&V[0]), x);
x += 16;
xlen -= 16;
Vle[0].u128++;
V[0] = Vle[0];
bswap128(&V[0]);
} else {
AES256_ECB(vsubkeys, vld1q_u8((uint8_t *)&V[0]), block);
memcpy(x, block, xlen);
xlen = 0;
}
}
memcpy(DRBG_ctx.V, &V[0], sizeof(V[0]));
AES256_CTR_DRBG_Update(NULL, vsubkeys, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter++;
return RNG_SUCCESS;
}
#ifdef RANDOMBYTES_ARM64CRYPTO
int randombytes(unsigned char *random_array, unsigned long long nbytes) {
int ret = randombytes_arm64crypto(random_array, nbytes);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret);
#endif
return ret;
}
void randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
randombytes_init_arm64crypto(entropy_input, personalization_string,
security_strength);
}
#endif

View File

@@ -0,0 +1,43 @@
set(SOURCE_FILES_COMMON_AESNI
aes_ni.c
ctr_drbg.c
randombytes_ctrdrbg_aesni.c
vaes256_key_expansion.S
)
foreach(SQISIGN_COMMON_TARGET sqisign_common_test sqisign_common_sys)
target_sources(${SQISIGN_COMMON_TARGET} PRIVATE ${SOURCE_FILES_COMMON_AESNI})
target_include_directories(${SQISIGN_COMMON_TARGET} PRIVATE include)
target_compile_definitions(${SQISIGN_COMMON_TARGET} PRIVATE RANDOMBYTES_AES_NI)
target_compile_options(${SQISIGN_COMMON_TARGET} PRIVATE -maes -mavx2)
endforeach()
set(SOURCE_FILES_CTRDRBG_TEST_BENCHMARK
../ref/aes_c.c
aes_ni.c
ctr_drbg.c
randombytes_ctrdrbg_aesni.c
../ref/randombytes_ctrdrbg.c
../generic/randombytes_system.c
vaes256_key_expansion.S
)
add_executable(sqisign_test_ctrdrbg_intel ${SOURCE_FILES_CTRDRBG_TEST_BENCHMARK} ../generic/test/test_ctrdrbg.c)
target_include_directories(sqisign_test_ctrdrbg_intel PRIVATE ${INC_PUBLIC} ${INC_COMMON} include ../ref/include)
target_compile_definitions(sqisign_test_ctrdrbg_intel PRIVATE
CTRDRBG_TEST_BENCH
RANDOMBYTES_INIT_PLATFORM=randombytes_init_aes_ni
RANDOMBYTES_PLATFORM=randombytes_aes_ni)
target_compile_options(sqisign_test_ctrdrbg_intel PRIVATE -maes -mavx2)
add_test(sqisign_test_ctrdrbg_intel sqisign_test_ctrdrbg_intel)
add_executable(sqisign_bench_ctrdrbg_intel ${SOURCE_FILES_CTRDRBG_TEST_BENCHMARK} ../generic/test/bench_ctrdrbg.c)
target_include_directories(sqisign_bench_ctrdrbg_intel PRIVATE ${INC_PUBLIC} ${INC_COMMON} include ../ref/include)
target_compile_definitions(sqisign_bench_ctrdrbg_intel PRIVATE
CTRDRBG_TEST_BENCH
RANDOMBYTES_INIT_PLATFORM=randombytes_init_aes_ni
RANDOMBYTES_PLATFORM=randombytes_aes_ni)
target_compile_options(sqisign_bench_ctrdrbg_intel PRIVATE -maes -mavx2)
set(BM_BINS ${BM_BINS} sqisign_bench_ctrdrbg_intel CACHE INTERNAL "List of benchmark executables")

View File

@@ -0,0 +1,258 @@
/***************************************************************************
* This implementation is a modified version of the code,
* written by Nir Drucker and Shay Gueron
* AWS Cryptographic Algorithms Group
* (ndrucker@amazon.com, gueron@amazon.com)
*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
* The license is detailed in the file LICENSE.txt, and applies to this file.
* ***************************************************************************/
#include "aes_ni.h"
#include <string.h>
#include <emmintrin.h>
#include <immintrin.h>
#define AESENC(m, key) _mm_aesenc_si128(m, key)
#define AESENCLAST(m, key) _mm_aesenclast_si128(m, key)
#define XOR(a, b) _mm_xor_si128(a, b)
#define ADD32(a, b) _mm_add_epi32(a, b)
#define SHUF8(a, mask) _mm_shuffle_epi8(a, mask)
#define ZERO256 _mm256_zeroall
#define BSWAP_MASK 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
#ifdef VAES256
#define VAESENC(a, key) _mm256_aesenc_epi128(a, key)
#define VAESENCLAST(a, key) _mm256_aesenclast_epi128(a, key)
#define EXTRACT128(a, imm) _mm256_extracti128_si256(a, imm)
#define XOR256(a, b) _mm256_xor_si256(a,b)
#define ADD32_256(a, b) _mm256_add_epi32(a,b)
#define SHUF8_256(a, mask) _mm256_shuffle_epi8(a, mask)
#endif
#ifdef VAES512
#define VAESENC(a, key) _mm512_aesenc_epi128(a, key)
#define VAESENCLAST(a, key) _mm512_aesenclast_epi128(a, key)
#define EXTRACT128(a, imm) _mm512_extracti64x2_epi64(a, imm)
#define XOR512(a, b) _mm512_xor_si512(a,b)
#define ADD32_512(a, b) _mm512_add_epi32(a,b)
#define SHUF8_512(a, mask) _mm512_shuffle_epi8(a, mask)
#endif
_INLINE_ __m128i load_m128i(IN const uint8_t *ctr)
{
return _mm_set_epi8(ctr[0], ctr[1], ctr[2], ctr[3],
ctr[4], ctr[5], ctr[6], ctr[7],
ctr[8], ctr[9], ctr[10], ctr[11],
ctr[12], ctr[13], ctr[14], ctr[15]);
}
_INLINE_ __m128i loadr_m128i(IN const uint8_t *ctr)
{
return _mm_setr_epi8(ctr[0], ctr[1], ctr[2], ctr[3],
ctr[4], ctr[5], ctr[6], ctr[7],
ctr[8], ctr[9], ctr[10], ctr[11],
ctr[12], ctr[13], ctr[14], ctr[15]);
}
void aes256_enc(OUT uint8_t *ct,
IN const uint8_t *pt,
IN const aes256_ks_t *ks) {
uint32_t i = 0;
__m128i block = loadr_m128i(pt);
block = XOR(block, ks->keys[0]);
for (i = 1; i < AES256_ROUNDS; i++) {
block = AESENC(block, ks->keys[i]);
}
block = AESENCLAST(block, ks->keys[AES256_ROUNDS]);
_mm_storeu_si128((void*)ct, block);
// Delete secrets from registers if any.
ZERO256();
}
void aes256_ctr_enc(OUT uint8_t *ct,
IN const uint8_t *ctr,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks)
{
__m128i ctr_block = load_m128i(ctr);
const __m128i bswap_mask = _mm_set_epi32(BSWAP_MASK);
const __m128i one = _mm_set_epi32(0,0,0,1);
__m128i block = SHUF8(ctr_block, bswap_mask);
for (uint32_t bidx = 0; bidx < num_blocks; bidx++)
{
block = XOR(block, ks->keys[0]);
for (uint32_t i = 1; i < AES256_ROUNDS; i++) {
block = AESENC(block, ks->keys[i]);
}
block = AESENCLAST(block, ks->keys[AES256_ROUNDS]);
//We use memcpy to avoid align casting.
_mm_storeu_si128((void*)&ct[16*bidx], block);
ctr_block = ADD32(ctr_block, one);
block = SHUF8(ctr_block, bswap_mask);
}
// Delete secrets from registers if any.
ZERO256();
}
#ifdef VAES256
_INLINE_ void load_ks(OUT __m256i ks256[AES256_ROUNDS + 1],
IN const aes256_ks_t *ks)
{
for(uint32_t i = 0; i < AES256_ROUNDS + 1; i++)
{
ks256[i] = _mm256_broadcastsi128_si256(ks->keys[i]);
}
}
// NIST 800-90A Table 3, Section 10.2.1 (no derivation function) states that
// max_number_of_bits_per_request is min((2^ctr_len - 4) x block_len, 2^19) <= 2^19
// Therefore the maximal number of blocks (16 bytes) is 2^19/128 = 2^19/2^7 = 2^12 < 2^32
// Here num_blocks is assumed to be less then 2^32.
// It is the caller responsiblity to ensure it.
void aes256_ctr_enc256(OUT uint8_t *ct,
IN const uint8_t *ctr,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks)
{
const uint64_t num_par_blocks = num_blocks/2;
const uint64_t blocks_rem = num_blocks - (2*(num_par_blocks));
__m256i ks256[AES256_ROUNDS + 1];
load_ks(ks256, ks);
__m128i single_block = load_m128i(ctr);
__m256i ctr_blocks = _mm256_broadcastsi128_si256(single_block);
// Preparing the masks
const __m256i bswap_mask = _mm256_set_epi32(BSWAP_MASK, BSWAP_MASK);
const __m256i two = _mm256_set_epi32(0,0,0,2,0,0,0,2);
const __m256i init = _mm256_set_epi32(0,0,0,1,0,0,0,0);
// Initialize two parallel counters
ctr_blocks = ADD32_256(ctr_blocks, init);
__m256i p = SHUF8_256(ctr_blocks, bswap_mask);
for (uint32_t block_idx = 0; block_idx < num_par_blocks; block_idx++)
{
p = XOR256(p, ks256[0]);
for (uint32_t i = 1; i < AES256_ROUNDS; i++)
{
p = VAESENC(p, ks256[i]);
}
p = VAESENCLAST(p, ks256[AES256_ROUNDS]);
// We use memcpy to avoid align casting.
_mm256_storeu_si256((__m256i *)&ct[PAR_AES_BLOCK_SIZE * block_idx], p);
// Increase the two counters in parallel
ctr_blocks = ADD32_256(ctr_blocks, two);
p = SHUF8_256(ctr_blocks, bswap_mask);
}
if(0 != blocks_rem)
{
single_block = EXTRACT128(p, 0);
aes256_ctr_enc(&ct[PAR_AES_BLOCK_SIZE * num_par_blocks],
(const uint8_t*)&single_block, blocks_rem, ks);
}
// Delete secrets from registers if any.
ZERO256();
}
#endif //VAES256
#ifdef VAES512
_INLINE_ void load_ks(OUT __m512i ks512[AES256_ROUNDS + 1],
IN const aes256_ks_t *ks)
{
for(uint32_t i = 0; i < AES256_ROUNDS + 1; i++)
{
ks512[i] = _mm512_broadcast_i32x4(ks->keys[i]);
}
}
// NIST 800-90A Table 3, Section 10.2.1 (no derivation function) states that
// max_number_of_bits_per_request is min((2^ctr_len - 4) x block_len, 2^19) <= 2^19
// Therefore the maximal number of blocks (16 bytes) is 2^19/128 = 2^19/2^7 = 2^12 < 2^32
// Here num_blocks is assumed to be less then 2^32.
// It is the caller responsiblity to ensure it.
void aes256_ctr_enc512(OUT uint8_t *ct,
IN const uint8_t *ctr,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks)
{
const uint64_t num_par_blocks = num_blocks/4;
const uint64_t blocks_rem = num_blocks - (4*(num_par_blocks));
__m512i ks512[AES256_ROUNDS + 1];
load_ks(ks512, ks);
__m128i single_block = load_m128i(ctr);
__m512i ctr_blocks = _mm512_broadcast_i32x4(single_block);
// Preparing the masks
const __m512i bswap_mask = _mm512_set_epi32(BSWAP_MASK, BSWAP_MASK,
BSWAP_MASK, BSWAP_MASK);
const __m512i four = _mm512_set_epi32(0,0,0,4,0,0,0,4,0,0,0,4,0,0,0,4);
const __m512i init = _mm512_set_epi32(0,0,0,3,0,0,0,2,0,0,0,1,0,0,0,0);
// Initialize four parallel counters
ctr_blocks = ADD32_512(ctr_blocks, init);
__m512i p = SHUF8_512(ctr_blocks, bswap_mask);
for (uint32_t block_idx = 0; block_idx < num_par_blocks; block_idx++)
{
p = XOR512(p, ks512[0]);
for (uint32_t i = 1; i < AES256_ROUNDS; i++)
{
p = VAESENC(p, ks512[i]);
}
p = VAESENCLAST(p, ks512[AES256_ROUNDS]);
// We use memcpy to avoid align casting.
_mm512_storeu_si512(&ct[PAR_AES_BLOCK_SIZE * block_idx], p);
// Increase the four counters in parallel
ctr_blocks = ADD32_512(ctr_blocks, four);
p = SHUF8_512(ctr_blocks, bswap_mask);
}
if(0 != blocks_rem)
{
single_block = EXTRACT128(p, 0);
aes256_ctr_enc(&ct[PAR_AES_BLOCK_SIZE * num_par_blocks],
(const uint8_t*)&single_block, blocks_rem, ks);
}
// Delete secrets from registers if any.
ZERO256();
}
#endif //VAES512

View File

@@ -0,0 +1,201 @@
/* Copyright (c) 2017, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
/***************************************************************************
* Small modification by Nir Drucker and Shay Gueron
* AWS Cryptographic Algorithms Group
* (ndrucker@amazon.com, gueron@amazon.com)
* include:
* 1) Use memcpy/memset instead of OPENSSL_memcpy/memset
* 2) Include aes.h as the underlying aes code
* 3) Modifying the drbg structure
* ***************************************************************************/
#include "ctr_drbg.h"
#include <string.h>
// Section references in this file refer to SP 800-90Ar1:
// http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf
int CTR_DRBG_init(CTR_DRBG_STATE *drbg,
const uint8_t entropy[CTR_DRBG_ENTROPY_LEN],
const uint8_t *personalization, size_t personalization_len) {
// Section 10.2.1.3.1
if (personalization_len > CTR_DRBG_ENTROPY_LEN) {
return 0;
}
uint8_t seed_material[CTR_DRBG_ENTROPY_LEN];
memcpy(seed_material, entropy, CTR_DRBG_ENTROPY_LEN);
for (size_t i = 0; i < personalization_len; i++) {
seed_material[i] ^= personalization[i];
}
// Section 10.2.1.2
// kInitMask is the result of encrypting blocks with big-endian value 1, 2
// and 3 with the all-zero AES-256 key.
static const uint8_t kInitMask[CTR_DRBG_ENTROPY_LEN] = {
0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9, 0xa9, 0x63, 0xb4, 0xf1,
0xc4, 0xcb, 0x73, 0x8b, 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18, 0x72, 0x60, 0x03, 0xca,
0x37, 0xa6, 0x2a, 0x74, 0xd1, 0xa2, 0xf5, 0x8e, 0x75, 0x06, 0x35, 0x8e,
};
for (size_t i = 0; i < sizeof(kInitMask); i++) {
seed_material[i] ^= kInitMask[i];
}
aes256_key_t key;
memcpy(key.raw, seed_material, 32);
memcpy(drbg->counter.bytes, seed_material + 32, 16);
aes256_key_expansion(&drbg->ks, &key);
drbg->reseed_counter = 1;
return 1;
}
// ctr_inc adds |n| to the last four bytes of |drbg->counter|, treated as a
// big-endian number.
static void ctr32_add(CTR_DRBG_STATE *drbg, uint32_t n) {
drbg->counter.words[3] =
CRYPTO_bswap4(CRYPTO_bswap4(drbg->counter.words[3]) + n);
}
static int ctr_drbg_update(CTR_DRBG_STATE *drbg, const uint8_t *data,
size_t data_len) {
// Per section 10.2.1.2, |data_len| must be |CTR_DRBG_ENTROPY_LEN|. Here, we
// allow shorter inputs and right-pad them with zeros. This is equivalent to
// the specified algorithm but saves a copy in |CTR_DRBG_generate|.
if (data_len > CTR_DRBG_ENTROPY_LEN) {
return 0;
}
uint8_t temp[CTR_DRBG_ENTROPY_LEN];
for (size_t i = 0; i < CTR_DRBG_ENTROPY_LEN; i += AES_BLOCK_SIZE) {
ctr32_add(drbg, 1);
aes256_enc(temp + i, drbg->counter.bytes, &drbg->ks);
}
for (size_t i = 0; i < data_len; i++) {
temp[i] ^= data[i];
}
aes256_key_t key;
memcpy(key.raw, temp, 32);
memcpy(drbg->counter.bytes, temp + 32, 16);
aes256_key_expansion(&drbg->ks, &key);
return 1;
}
int CTR_DRBG_reseed(CTR_DRBG_STATE *drbg,
const uint8_t entropy[CTR_DRBG_ENTROPY_LEN],
const uint8_t *additional_data,
size_t additional_data_len) {
// Section 10.2.1.4
uint8_t entropy_copy[CTR_DRBG_ENTROPY_LEN];
if (additional_data_len > 0) {
if (additional_data_len > CTR_DRBG_ENTROPY_LEN) {
return 0;
}
memcpy(entropy_copy, entropy, CTR_DRBG_ENTROPY_LEN);
for (size_t i = 0; i < additional_data_len; i++) {
entropy_copy[i] ^= additional_data[i];
}
entropy = entropy_copy;
}
if (!ctr_drbg_update(drbg, entropy, CTR_DRBG_ENTROPY_LEN)) {
return 0;
}
drbg->reseed_counter = 1;
return 1;
}
int CTR_DRBG_generate(CTR_DRBG_STATE *drbg, uint8_t *out, size_t out_len,
const uint8_t *additional_data,
size_t additional_data_len) {
if (additional_data_len != 0 &&
!ctr_drbg_update(drbg, additional_data, additional_data_len)) {
return 0;
}
// kChunkSize is used to interact better with the cache. Since the AES-CTR
// code assumes that it's encrypting rather than just writing keystream, the
// buffer has to be zeroed first. Without chunking, large reads would zero
// the whole buffer, flushing the L1 cache, and then do another pass (missing
// the cache every time) to “encrypt” it. The code can avoid this by
// chunking.
static const size_t kChunkSize = 8 * 1024;
while (out_len >= AES_BLOCK_SIZE) {
size_t todo = kChunkSize;
if (todo > out_len) {
todo = out_len;
}
todo &= ~(AES_BLOCK_SIZE - 1);
const size_t num_blocks = todo / AES_BLOCK_SIZE;
if (1) {
memset(out, 0, todo);
ctr32_add(drbg, 1);
#ifdef VAES512
aes256_ctr_enc512(out, drbg->counter.bytes, num_blocks, &drbg->ks);
#elif defined(VAES256)
aes256_ctr_enc256(out, drbg->counter.bytes, num_blocks, &drbg->ks);
#else
aes256_ctr_enc(out, drbg->counter.bytes, num_blocks, &drbg->ks);
#endif
ctr32_add(drbg, num_blocks - 1);
} else {
for (size_t i = 0; i < todo; i += AES_BLOCK_SIZE) {
ctr32_add(drbg, 1);
aes256_enc(&out[i], drbg->counter.bytes, &drbg->ks);
}
}
out += todo;
out_len -= todo;
}
if (out_len > 0) {
uint8_t block[AES_BLOCK_SIZE];
ctr32_add(drbg, 1);
aes256_enc(block, drbg->counter.bytes, &drbg->ks);
memcpy(out, block, out_len);
}
// Right-padding |additional_data| in step 2.2 is handled implicitly by
// |ctr_drbg_update|, to save a copy.
if (!ctr_drbg_update(drbg, additional_data, additional_data_len)) {
return 0;
}
drbg->reseed_counter++;
return 1;
}
void CTR_DRBG_clear(CTR_DRBG_STATE *drbg) {
secure_clean((uint8_t *)drbg, sizeof(CTR_DRBG_STATE));
}

View File

@@ -0,0 +1,85 @@
/***************************************************************************
* Written by Nir Drucker and Shay Gueron
* AWS Cryptographic Algorithms Group
* (ndrucker@amazon.com, gueron@amazon.com)
*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
* The license is detailed in the file LICENSE.txt, and applies to this file.
* ***************************************************************************/
#pragma once
#include <stdint.h>
#include <wmmintrin.h>
#include "defs.h"
#define MAX_AES_INVOKATION (MASK(32))
#define AES256_KEY_SIZE (32ULL)
#define AES256_KEY_BITS (AES256_KEY_SIZE * 8)
#define AES_BLOCK_SIZE (16ULL)
#define AES256_ROUNDS (14ULL)
#ifdef VAES256
#define PAR_AES_BLOCK_SIZE (AES_BLOCK_SIZE*2)
#elif defined(VAES512)
#define PAR_AES_BLOCK_SIZE (AES_BLOCK_SIZE*4)
#endif
typedef ALIGN(16) struct aes256_key_s {
uint8_t raw[AES256_KEY_SIZE];
} aes256_key_t;
typedef ALIGN(16) struct aes256_ks_s {
__m128i keys[AES256_ROUNDS + 1];
} aes256_ks_t;
// The ks parameter must be 16 bytes aligned!
EXTERNC void aes256_key_expansion(OUT aes256_ks_t *ks,
IN const aes256_key_t *key);
// Encrypt one 128-bit block ct = E(pt,ks)
void aes256_enc(OUT uint8_t *ct,
IN const uint8_t *pt,
IN const aes256_ks_t *ks);
// Encrypt num_blocks 128-bit blocks
// ct[15:0] = E(pt[15:0],ks)
// ct[31:16] = E(pt[15:0] + 1,ks)
// ...
// ct[16*num_blocks - 1:16*(num_blocks-1)] = E(pt[15:0] + num_blocks,ks)
void aes256_ctr_enc(OUT uint8_t *ct,
IN const uint8_t *pt,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks);
// Encrypt num_blocks 128-bit blocks using VAES (AVX-2)
// ct[15:0] = E(pt[15:0],ks)
// ct[31:16] = E(pt[15:0] + 1,ks)
// ...
// ct[16*num_blocks - 1:16*(num_blocks-1)] = E(pt[15:0] + num_blocks,ks)
void aes256_ctr_enc256(OUT uint8_t *ct,
IN const uint8_t *ctr,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks);
// Encrypt num_blocks 128-bit blocks using VAES (AVX512)
// ct[15:0] = E(pt[15:0],ks)
// ct[31:16] = E(pt[15:0] + 1,ks)
// ...
// ct[16*num_blocks - 1:16*(num_blocks-1)] = E(pt[15:0] + num_blocks,ks)
void aes256_ctr_enc512(OUT uint8_t *ct,
IN const uint8_t *ctr,
IN const uint32_t num_blocks,
IN const aes256_ks_t *ks);

View File

@@ -0,0 +1,78 @@
/* Copyright (c) 2017, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
/***************************************************************************
* Small modification by Nir Drucker and Shay Gueron
* AWS Cryptographic Algorithms Group
* (ndrucker@amazon.com, gueron@amazon.com)
* include:
* 1) Use memcpy/memset instead of OPENSSL_memcpy/memset
* 2) Include aes.h as the underlying aes code
* 3) Modifying the drbg structure
* ***************************************************************************/
#pragma once
#if defined(__cplusplus)
extern "C" {
#endif
#include "aes_ni.h"
// CTR_DRBG_STATE contains the state of a CTR_DRBG based on AES-256. See SP
// 800-90Ar1.
typedef struct {
aes256_ks_t ks;
union {
uint8_t bytes[16];
uint32_t words[4];
} counter;
uint64_t reseed_counter;
} CTR_DRBG_STATE;
// See SP 800-90Ar1, table 3.
#define CTR_DRBG_ENTROPY_LEN 48
// CTR_DRBG_init initialises |*drbg| given |CTR_DRBG_ENTROPY_LEN| bytes of
// entropy in |entropy| and, optionally, a personalization string up to
// |CTR_DRBG_ENTROPY_LEN| bytes in length. It returns one on success and zero
// on error.
int CTR_DRBG_init(CTR_DRBG_STATE *drbg,
const uint8_t entropy[CTR_DRBG_ENTROPY_LEN],
const uint8_t *personalization,
size_t personalization_len);
// CTR_DRBG_reseed reseeds |drbg| given |CTR_DRBG_ENTROPY_LEN| bytes of entropy
// in |entropy| and, optionally, up to |CTR_DRBG_ENTROPY_LEN| bytes of
// additional data. It returns one on success or zero on error.
int CTR_DRBG_reseed(CTR_DRBG_STATE *drbg,
const uint8_t entropy[CTR_DRBG_ENTROPY_LEN],
const uint8_t *additional_data,
size_t additional_data_len);
// CTR_DRBG_generate processes to up |CTR_DRBG_ENTROPY_LEN| bytes of additional
// data (if any) and then writes |out_len| random bytes to |out|. It returns one on success or
// zero on error.
int CTR_DRBG_generate(CTR_DRBG_STATE *drbg, uint8_t *out,
size_t out_len,
const uint8_t *additional_data,
size_t additional_data_len);
// CTR_DRBG_clear zeroises the state of |drbg|.
void CTR_DRBG_clear(CTR_DRBG_STATE *drbg);
#if defined(__cplusplus)
} // extern C
#endif

View File

@@ -0,0 +1,63 @@
/***************************************************************************
* Written by Nir Drucker and Shay Gueron
* AWS Cryptographic Algorithms Group
* (ndrucker@amazon.com, gueron@amazon.com)
*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
* The license is detailed in the file LICENSE.txt, and applies to this file.
* ***************************************************************************/
#pragma once
#include <string.h>
#ifdef __cplusplus
#define EXTERNC extern "C"
#else
#define EXTERNC
#endif
// For code clarity.
#define IN
#define OUT
#define ALIGN(n) __attribute__((aligned(n)))
#define _INLINE_ static inline
typedef enum
{
SUCCESS=0,
ERROR=1
} status_t;
#define SUCCESS 0
#define ERROR 1
#define GUARD(func) {if(SUCCESS != func) {return ERROR;}}
#if defined(__GNUC__) && __GNUC__ >= 2
static inline uint32_t CRYPTO_bswap4(uint32_t x) {
return __builtin_bswap32(x);
}
#endif
_INLINE_ void secure_clean(OUT uint8_t *p, IN const uint32_t len)
{
#ifdef _WIN32
SecureZeroMemory(p, len);
#else
typedef void *(*memset_t)(void *, int, size_t);
static volatile memset_t memset_func = memset;
memset_func(p, 0, len);
#endif
}

View File

@@ -0,0 +1,87 @@
// SPDX-License-Identifier: Apache-2.0 and Unknown
//
/*
NIST-developed software is provided by NIST as a public service. You may use,
copy, and distribute copies of the software in any medium, provided that you
keep intact this entire notice. You may improve, modify, and create derivative
works of the software or any portion of the software, and you may copy and
distribute such modifications or works. Modified works should carry a notice
stating that you changed the software and should note the date and nature of any
such change. Please explicitly acknowledge the National Institute of Standards
and Technology as the source of the software.
NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF
ANY KIND, EXPRESS, IMPLIED, IN FACT, OR ARISING BY OPERATION OF LAW, INCLUDING,
WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE, NON-INFRINGEMENT, AND DATA ACCURACY. NIST NEITHER REPRESENTS
NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR
ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE
ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF,
INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR
USEFULNESS OF THE SOFTWARE.
You are solely responsible for determining the appropriateness of using and
distributing the software and you assume all risks associated with its use,
including but not limited to the risks and costs of program errors, compliance
with applicable laws, damage to or loss of data, programs or equipment, and the
unavailability or interruption of operation. This software is not intended to be
used in any situation where a failure could cause risk of injury or damage to
property. The software developed by NIST employees is not subject to copyright
protection within the United States.
*/
#include <string.h>
#include <rng.h>
#include "ctr_drbg.h"
#ifdef ENABLE_CT_TESTING
#include <valgrind/memcheck.h>
#endif
#define RNG_SUCCESS 0
#define RNG_BAD_MAXLEN -1
#define RNG_BAD_OUTBUF -2
#define RNG_BAD_REQ_LEN -3
CTR_DRBG_STATE drbg;
#ifndef CTRDRBG_TEST_BENCH
static
#endif
void
randombytes_init_aes_ni(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
(void)security_strength; // fixed to 256
CTR_DRBG_init(&drbg, entropy_input, personalization_string,
(personalization_string == NULL) ? 0 : CTR_DRBG_ENTROPY_LEN);
}
#ifndef CTRDRBG_TEST_BENCH
static
#endif
int
randombytes_aes_ni(unsigned char *x, size_t xlen) {
CTR_DRBG_generate(&drbg, x, xlen, NULL, 0);
return RNG_SUCCESS;
}
#ifdef RANDOMBYTES_AES_NI
SQISIGN_API
int randombytes(unsigned char *random_array, unsigned long long nbytes) {
int ret = randombytes_aes_ni(random_array, nbytes);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret);
#endif
return ret;
}
SQISIGN_API
void randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
randombytes_init_aes_ni(entropy_input, personalization_string,
security_strength);
}
#endif

View File

@@ -0,0 +1,122 @@
#***************************************************************************
# This implementation is a modified version of the code,
# written by Nir Drucker and Shay Gueron
# AWS Cryptographic Algorithms Group
# (ndrucker@amazon.com, gueron@amazon.com)
#
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
# The license is detailed in the file LICENSE.txt, and applies to this file.
#***************************************************************************
.intel_syntax noprefix
.data
.p2align 4, 0x90
MASK1:
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
CON1:
.long 1,1,1,1
.set k256_size, 32
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",@progbits
#endif
.text
################################################################################
# void aes256_key_expansion(OUT aes256_ks_t* ks, IN const uint8_t* key);
# The output parameter must be 16 bytes aligned!
#
#Linux ABI
#define out rdi
#define in rsi
#define CON xmm0
#define MASK_REG xmm1
#define IN0 xmm2
#define IN1 xmm3
#define TMP1 xmm4
#define TMP2 xmm5
#define ZERO xmm15
.macro ROUND1 in0 in1
add out, k256_size
vpshufb TMP2, \in1, MASK_REG
aesenclast TMP2, CON
vpslld CON, CON, 1
vpslldq TMP1, \in0, 4
vpxor \in0, \in0, TMP1
vpslldq TMP1, TMP1, 4
vpxor \in0, \in0, TMP1
vpslldq TMP1, TMP1, 4
vpxor \in0, \in0, TMP1
vpxor \in0, \in0, TMP2
vmovdqa [out], \in0
.endm
.macro ROUND2
vpshufd TMP2, IN0, 0xff
aesenclast TMP2, ZERO
vpslldq TMP1, IN1, 4
vpxor IN1, IN1, TMP1
vpslldq TMP1, TMP1, 4
vpxor IN1, IN1, TMP1
vpslldq TMP1, TMP1, 4
vpxor IN1, IN1, TMP1
vpxor IN1, IN1, TMP2
vmovdqa [out+16], IN1
.endm
#ifdef __APPLE__
#define AES256_KEY_EXPANSION _aes256_key_expansion
#else
#define AES256_KEY_EXPANSION aes256_key_expansion
#endif
#ifndef __APPLE__
.type AES256_KEY_EXPANSION,@function
.hidden AES256_KEY_EXPANSION
#endif
.globl AES256_KEY_EXPANSION
AES256_KEY_EXPANSION:
vmovdqu IN0, [in]
vmovdqu IN1, [in+16]
vmovdqa [out], IN0
vmovdqa [out+16], IN1
vmovdqa CON, [rip+CON1]
vmovdqa MASK_REG, [rip+MASK1]
vpxor ZERO, ZERO, ZERO
mov ax, 6
.loop256:
ROUND1 IN0, IN1
dec ax
ROUND2
jne .loop256
ROUND1 IN0, IN1
ret
#ifndef __APPLE__
.size AES256_KEY_EXPANSION, .-AES256_KEY_EXPANSION
#endif

View File

@@ -1,26 +1,15 @@
set(SOURCE_FILES_COMMON_SYS
set(SOURCE_FILES_COMMON_GENERIC
randombytes_system.c
aes_c.c
fips202.c
mem.c
tools.c
)
add_library(sqisign_common_sys ${SOURCE_FILES_COMMON_SYS})
target_include_directories(sqisign_common_sys PRIVATE include ../../include)
target_compile_options(sqisign_common_sys PUBLIC ${C_OPT_FLAGS})
set(SOURCE_FILES_COMMON_TEST
randombytes_ctrdrbg.c
aes_c.c
fips202.c
mem.c
)
add_library(sqisign_common_test ${SOURCE_FILES_COMMON_TEST})
target_include_directories(sqisign_common_test PRIVATE include ../include)
target_compile_options(sqisign_common_test PUBLIC ${C_OPT_FLAGS})
if (ENABLE_CT_TESTING)
target_compile_definitions(sqisign_common_sys PUBLIC ENABLE_CT_TESTING)
target_compile_definitions(sqisign_common_test PUBLIC ENABLE_CT_TESTING)
endif()
foreach (SQISIGN_COMMON_TARGET sqisign_common_test sqisign_common_sys)
add_library(${SQISIGN_COMMON_TARGET} STATIC ${SOURCE_FILES_COMMON_GENERIC})
target_include_directories(${SQISIGN_COMMON_TARGET} PRIVATE include ${INC_PUBLIC})
target_compile_options(${SQISIGN_COMMON_TARGET} PUBLIC ${C_OPT_FLAGS})
if (ENABLE_CT_TESTING)
target_compile_definitions(${SQISIGN_COMMON_TARGET} PUBLIC ENABLE_CT_TESTING)
endif()
endforeach()

View File

@@ -13,167 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#define SHAKE128_RATE 168
#define SHAKE256_RATE 136
#define SHA3_256_RATE 136
#define SHA3_384_RATE 104
#define SHA3_512_RATE 72
#define PQC_SHAKEINCCTX_BYTES (sizeof(uint64_t)*26)
#define PQC_SHAKECTX_BYTES (sizeof(uint64_t)*25)
// Context for incremental API
typedef struct {
uint64_t *ctx;
} shake128incctx;
// Context for non-incremental API
typedef struct {
uint64_t *ctx;
} shake128ctx;
// Context for incremental API
typedef struct {
uint64_t *ctx;
} shake256incctx;
// Context for non-incremental API
typedef struct {
uint64_t *ctx;
} shake256ctx;
// Context for incremental API
typedef struct {
uint64_t *ctx;
} sha3_256incctx;
// Context for incremental API
typedef struct {
uint64_t *ctx;
} sha3_384incctx;
// Context for incremental API
typedef struct {
uint64_t *ctx;
} sha3_512incctx;
/* Initialize the state and absorb the provided input.
*
* This function does not support being called multiple times
* with the same state.
*/
void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state);
/* Free the state */
void shake128_ctx_release(shake128ctx *state);
/* Copy the state. */
void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src);
/* Initialize incremental hashing API */
void shake128_inc_init(shake128incctx *state);
/* Absorb more information into the XOF.
*
* Can be called multiple times.
*/
void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen);
/* Finalize the XOF for squeezing */
void shake128_inc_finalize(shake128incctx *state);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state);
/* Copy the context of the SHAKE128 XOF */
void shake128_inc_ctx_clone(shake128incctx *dest, const shake128incctx *src);
/* Free the context of the SHAKE128 XOF */
void shake128_inc_ctx_release(shake128incctx *state);
/* Initialize the state and absorb the provided input.
*
* This function does not support being called multiple times
* with the same state.
*/
void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state);
/* Free the context held by this XOF */
void shake256_ctx_release(shake256ctx *state);
/* Copy the context held by this XOF */
void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src);
/* Initialize incremental hashing API */
void shake256_inc_init(shake256incctx *state);
void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inlen);
/* Prepares for squeeze phase */
void shake256_inc_finalize(shake256incctx *state);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake256_inc_squeeze(uint8_t *output, size_t outlen, shake256incctx *state);
/* Copy the state */
void shake256_inc_ctx_clone(shake256incctx *dest, const shake256incctx *src);
/* Free the state */
void shake256_inc_ctx_release(shake256incctx *state);
/* One-stop SHAKE128 call */
void shake128(uint8_t *output, size_t outlen,
const uint8_t *input, size_t inlen);
/* One-stop SHAKE256 call */
void shake256(uint8_t *output, size_t outlen,
const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_256_inc_init(sha3_256incctx *state);
/* Absorb blocks into SHA3 */
void sha3_256_inc_absorb(sha3_256incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_256_inc_finalize(uint8_t *output, sha3_256incctx *state);
/* Copy the context */
void sha3_256_inc_ctx_clone(sha3_256incctx *dest, const sha3_256incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_256_inc_ctx_release(sha3_256incctx *state);
void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_384_inc_init(sha3_384incctx *state);
/* Absorb blocks into SHA3 */
void sha3_384_inc_absorb(sha3_384incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_384_inc_finalize(uint8_t *output, sha3_384incctx *state);
/* Copy the context */
void sha3_384_inc_ctx_clone(sha3_384incctx *dest, const sha3_384incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_384_inc_ctx_release(sha3_384incctx *state);
/* One-stop SHA3-384 shop */
void sha3_384(uint8_t *output, const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_512_inc_init(sha3_512incctx *state);
/* Absorb blocks into SHA3 */
void sha3_512_inc_absorb(sha3_512incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_512_inc_finalize(uint8_t *output, sha3_512incctx *state);
/* Copy the context */
void sha3_512_inc_ctx_clone(sha3_512incctx *dest, const sha3_512incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_512_inc_ctx_release(sha3_512incctx *state);
/* One-stop SHA3-512 shop */
void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen);
#include "fips202.h"
#define NROUNDS 24
#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset))))
@@ -686,10 +526,6 @@ static void keccak_inc_squeeze(uint8_t *h, size_t outlen,
}
void shake128_inc_init(shake128incctx *state) {
state->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_inc_init(state->ctx);
}
@@ -706,22 +542,14 @@ void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state)
}
void shake128_inc_ctx_clone(shake128incctx *dest, const shake128incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKEINCCTX_BYTES);
}
void shake128_inc_ctx_release(shake128incctx *state) {
free(state->ctx);
(void)state;
}
void shake256_inc_init(shake256incctx *state) {
state->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_inc_init(state->ctx);
}
@@ -738,15 +566,11 @@ void shake256_inc_squeeze(uint8_t *output, size_t outlen, shake256incctx *state)
}
void shake256_inc_ctx_clone(shake256incctx *dest, const shake256incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKEINCCTX_BYTES);
}
void shake256_inc_ctx_release(shake256incctx *state) {
free(state->ctx);
(void)state;
}
@@ -762,10 +586,6 @@ void shake256_inc_ctx_release(shake256incctx *state) {
* - size_t inlen: length of input in bytes
**************************************************/
void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen) {
state->ctx = malloc(PQC_SHAKECTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_absorb(state->ctx, SHAKE128_RATE, input, inlen, 0x1F);
}
@@ -786,16 +606,12 @@ void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state)
}
void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src) {
dest->ctx = malloc(PQC_SHAKECTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES);
}
/** Release the allocated state. Call only once. */
void shake128_ctx_release(shake128ctx *state) {
free(state->ctx);
(void)state;
}
/*************************************************
@@ -810,10 +626,6 @@ void shake128_ctx_release(shake128ctx *state) {
* - size_t inlen: length of input in bytes
**************************************************/
void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen) {
state->ctx = malloc(PQC_SHAKECTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F);
}
@@ -834,16 +646,12 @@ void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state)
}
void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src) {
dest->ctx = malloc(PQC_SHAKECTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES);
}
/** Release the allocated state. Call only once. */
void shake256_ctx_release(shake256ctx *state) {
free(state->ctx);
(void)state;
}
/*************************************************
@@ -909,23 +717,15 @@ void shake256(uint8_t *output, size_t outlen,
}
void sha3_256_inc_init(sha3_256incctx *state) {
state->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_inc_init(state->ctx);
}
void sha3_256_inc_ctx_clone(sha3_256incctx *dest, const sha3_256incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKEINCCTX_BYTES);
}
void sha3_256_inc_ctx_release(sha3_256incctx *state) {
free(state->ctx);
(void)state;
}
void sha3_256_inc_absorb(sha3_256incctx *state, const uint8_t *input, size_t inlen) {
@@ -970,18 +770,10 @@ void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) {
}
void sha3_384_inc_init(sha3_384incctx *state) {
state->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_inc_init(state->ctx);
}
void sha3_384_inc_ctx_clone(sha3_384incctx *dest, const sha3_384incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKEINCCTX_BYTES);
}
@@ -990,7 +782,7 @@ void sha3_384_inc_absorb(sha3_384incctx *state, const uint8_t *input, size_t inl
}
void sha3_384_inc_ctx_release(sha3_384incctx *state) {
free(state->ctx);
(void)state;
}
void sha3_384_inc_finalize(uint8_t *output, sha3_384incctx *state) {
@@ -1031,18 +823,10 @@ void sha3_384(uint8_t *output, const uint8_t *input, size_t inlen) {
}
void sha3_512_inc_init(sha3_512incctx *state) {
state->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (state->ctx == NULL) {
exit(111);
}
keccak_inc_init(state->ctx);
}
void sha3_512_inc_ctx_clone(sha3_512incctx *dest, const sha3_512incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
exit(111);
}
memcpy(dest->ctx, src->ctx, PQC_SHAKEINCCTX_BYTES);
}
@@ -1051,7 +835,7 @@ void sha3_512_inc_absorb(sha3_512incctx *state, const uint8_t *input, size_t inl
}
void sha3_512_inc_ctx_release(sha3_512incctx *state) {
free(state->ctx);
(void)state;
}
void sha3_512_inc_finalize(uint8_t *output, sha3_512incctx *state) {
@@ -1090,13 +874,3 @@ void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) {
output[i] = t[i];
}
}
int SHAKE128(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen) {
shake128(output, outputByteLen, input, inputByteLen);
return 0;
}
int SHAKE256(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen) {
shake256(output, outputByteLen, input, inputByteLen);
return 0;
}

View File

@@ -1,23 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
#ifndef AES_H
#define AES_H
#include <stddef.h>
#include <stdint.h>
void AES_256_ECB(const uint8_t *input, const uint8_t *key, uint8_t *output);
#define AES_ECB_encrypt AES_256_ECB
#ifdef ENABLE_AESNI
int AES_128_CTR_NI(unsigned char *output, size_t outputByteLen,
const unsigned char *input, size_t inputByteLen);
int AES_128_CTR_4R_NI(unsigned char *output, size_t outputByteLen,
const unsigned char *input, size_t inputByteLen);
#define AES_128_CTR AES_128_CTR_NI
#else
int AES_128_CTR(unsigned char *output, size_t outputByteLen,
const unsigned char *input, size_t inputByteLen);
#endif
#endif

View File

@@ -1,63 +1,126 @@
// SPDX-License-Identifier: Apache-2.0
#ifndef BENCH_H__
#define BENCH_H__
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <inttypes.h>
#if defined(TARGET_OS_UNIX) && (defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_OTHER))
#include <time.h>
#if defined(__APPLE__)
#include "bench_macos.h"
#endif
#if (defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_S390X) || defined(TARGET_OTHER))
#define print_bench_unit printf("nsec\n");
#if defined(TARGET_ARM) || defined(TARGET_S390X) || defined(NO_CYCLE_COUNTER)
#define BENCH_UNIT0 "nanoseconds"
#define BENCH_UNIT3 "microseconds"
#define BENCH_UNIT6 "milliseconds"
#define BENCH_UNIT9 "seconds"
#else
#define print_bench_unit printf("cycles\n");
#define BENCH_UNIT0 "cycles"
#define BENCH_UNIT3 "kilocycles"
#define BENCH_UNIT6 "megacycles"
#define BENCH_UNIT9 "gigacycles"
#endif
#if (defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_S390X))
#define BENCH_UNITS "nsec"
#else
#define BENCH_UNITS "cycles"
static inline void
cpucycles_init(void) {
#if defined(__APPLE__) && defined(TARGET_ARM64)
macos_init_rdtsc();
#endif
}
static inline int64_t cpucycles(void) {
#if (defined(TARGET_AMD64) || defined(TARGET_X86))
unsigned int hi, lo;
static inline uint64_t
cpucycles(void)
{
#if defined(TARGET_AMD64) || defined(TARGET_X86)
uint32_t hi, lo;
asm volatile ("rdtsc" : "=a" (lo), "=d"(hi));
return ((int64_t) lo) | (((int64_t) hi) << 32);
#elif (defined(TARGET_S390X))
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo) | ((uint64_t)hi << 32);
#elif defined(TARGET_S390X)
uint64_t tod;
asm volatile("stckf %0\n" : "=Q" (tod) : : "cc");
asm volatile("stckf %0\n" : "=Q"(tod) : : "cc");
return (tod * 1000 / 4096);
#elif defined(TARGET_ARM64) && !defined(NO_CYCLE_COUNTER)
#if defined(__APPLE__)
return macos_rdtsc();
#else
uint64_t cycles;
asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(cycles));
return cycles;
#endif // __APPLE__
#else
struct timespec time;
clock_gettime(CLOCK_REALTIME, &time);
return (int64_t)(time.tv_sec * 1e9 + time.tv_nsec);
return (uint64_t)time.tv_sec * 1000000000 + time.tv_nsec;
#endif
}
static inline int cmpfunc (const void *a, const void *b) {
return ( *(uint64_t *)a - * (uint64_t *)b );
static inline int
CMPFUNC(const void *a, const void *b)
{
uint64_t aa = *(uint64_t *)a, bb = *(uint64_t *)b;
if (aa > bb)
return +1;
if (aa < bb)
return -1;
return 0;
}
#define BENCH_CODE_1(r) \
cycles = 0; \
for (i = 0; i < (r); ++i) { \
cycles1 = cpucycles();
#define BENCH_CODE_2(name, csv) \
cycles2 = cpucycles(); \
if(i < LIST_SIZE) \
cycles_list[i] = (cycles2 - cycles1);\
cycles = cycles + (cycles2 - cycles1); \
} \
qsort(cycles_list, (runs < LIST_SIZE)? runs : LIST_SIZE, sizeof(uint64_t), cmpfunc);\
if (csv) \
printf("%2" PRId64 ",", cycles_list[(runs < LIST_SIZE)? runs/2 : LIST_SIZE/2]); \
else { \
printf(" %-20s-> median: %2" PRId64 ", average: %2" PRId64 " ", name, \
cycles_list[(runs < LIST_SIZE)? runs/2 : LIST_SIZE/2], (cycles / runs)); \
printf("%s\n", BENCH_UNITS); \
static inline uint32_t
ISQRT(uint64_t x)
{
uint32_t r = 0;
for (ssize_t i = 31; i >= 0; --i) {
uint32_t s = r + (1 << i);
if ((uint64_t)s * s <= x)
r = s;
}
return r;
}
static inline double
_TRUNC(uint64_t x)
{
return x / 1000 / 1000.;
}
#define _FMT ".3lf"
#define _UNIT BENCH_UNIT6
#define BENCH_CODE_1(RUNS) \
{ \
const size_t count = (RUNS); \
if (!count) \
abort(); \
uint64_t cycles, cycles1, cycles2; \
uint64_t cycles_list[count]; \
cycles = 0; \
for (size_t i = 0; i < count; ++i) { \
cycles1 = cpucycles();
#define BENCH_CODE_2(name) \
cycles2 = cpucycles(); \
cycles_list[i] = cycles2 - cycles1; \
cycles += cycles2 - cycles1; \
} \
qsort(cycles_list, count, sizeof(uint64_t), CMPFUNC); \
uint64_t variance = 0; \
for (size_t i = 0; i < count; ++i) { \
int64_t off = cycles_list[i] - cycles / count; \
variance += off * off; \
} \
variance /= count; \
printf(" %-10s", name); \
printf(" | average %9" _FMT " | stddev %9" _FMT, \
_TRUNC(cycles / count), \
_TRUNC(ISQRT(variance))); \
printf(" | median %9" _FMT " | min %9" _FMT " | max %9" _FMT, \
_TRUNC(cycles_list[count / 2]), \
_TRUNC(cycles_list[0]), \
_TRUNC(cycles_list[count - 1])); \
printf(" (%s)\n", _UNIT); \
}
#endif

View File

@@ -0,0 +1,143 @@
// WARNING: must be run as root on an M1 device
// WARNING: fragile, uses private apple APIs
// currently no command line interface, see variables at top of main
/*
no warranty; use at your own risk - i believe this code needs
some minor changes to work on some later hardware and/or software revisions,
which is unsurprising given the use of undocumented, private APIs.
------------------------------------------------------------------------------
This code is available under 2 licenses -- choose whichever you prefer.
------------------------------------------------------------------------------
ALTERNATIVE A - MIT License
Copyright (c) 2020 Dougall Johnson
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
------------------------------------------------------------------------------
ALTERNATIVE B - Public Domain (www.unlicense.org)
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
software, either in source code form or as a compiled binary, for any purpose,
commercial or non-commercial, and by any means.
In jurisdictions that recognize copyright laws, the author or authors of this
software dedicate any and all copyright interest in the software to the public
domain. We make this dedication for the benefit of the public at large and to
the detriment of our heirs and successors. We intend this dedication to be an
overt act of relinquishment in perpetuity of all present and future rights to
this software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
------------------------------------------------------------------------------
*/
/*
Based on https://github.com/travisdowns/robsize
Henry Wong <henry@stuffedcow.net>
http://blog.stuffedcow.net/2013/05/measuring-rob-capacity/
2014-10-14
*/
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define KPERF_LIST \
/* ret, name, params */ \
F(int, kpc_force_all_ctrs_set, int) \
F(int, kpc_set_counting, uint32_t) \
F(int, kpc_set_thread_counting, uint32_t) \
F(int, kpc_set_config, uint32_t, void *) \
F(int, kpc_get_thread_counters, int, unsigned int, void *)
#define F(ret, name, ...) \
typedef ret name##proc(__VA_ARGS__); \
static name##proc *name;
KPERF_LIST
#undef F
#define CFGWORD_EL0A64EN_MASK (0x20000)
#define CPMU_CORE_CYCLE 0x02
#define KPC_CLASS_FIXED (0)
#define KPC_CLASS_CONFIGURABLE (1)
#define COUNTERS_COUNT 10
#define KPC_MASK ((1u << KPC_CLASS_CONFIGURABLE) | (1u << KPC_CLASS_FIXED))
static uint64_t g_config[COUNTERS_COUNT];
static uint64_t g_counters[COUNTERS_COUNT];
static void
macos_configure_rdtsc()
{
if (kpc_force_all_ctrs_set(1)) {
printf("kpc_force_all_ctrs_set failed\n");
return;
}
if (kpc_set_config(KPC_MASK, g_config)) {
printf("kpc_set_config failed\n");
return;
}
if (kpc_set_counting(KPC_MASK)) {
printf("kpc_set_counting failed\n");
return;
}
if (kpc_set_thread_counting(KPC_MASK)) {
printf("kpc_set_thread_counting failed\n");
return;
}
}
static void
macos_init_rdtsc()
{
void *kperf =
dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
if (!kperf) {
printf("kperf = %p\n", kperf);
return;
}
#define F(ret, name, ...) \
name = (name##proc *)(intptr_t)(dlsym(kperf, #name)); \
if (!name) { \
printf("%s = %p\n", #name, (void *)(intptr_t)name); \
return; \
}
KPERF_LIST
#undef F
g_config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
macos_configure_rdtsc();
}
static uint64_t
macos_rdtsc(void)
{
if (kpc_get_thread_counters(0, COUNTERS_COUNT, g_counters)) {
printf("kpc_get_thread_counters failed\n");
return 1;
}
return g_counters[2];
}

View File

@@ -0,0 +1,32 @@
// SPDX-License-Identifier: Apache-2.0
#ifndef BENCH_TEST_ARGUMENTS_H__
#define BENCH_TEST_ARGUMENTS_H__
#include <inttypes.h>
#include <stdio.h>
#include <stdint.h>
static int parse_seed(const char *arg, uint32_t *seed)
{
if (sscanf(arg, "--seed=%u", &seed[0]) == 1)
return 0;
if (sscanf(arg, "--seed={ "
"0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", "
"0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 " }",
&seed[0], &seed[1], &seed[2], &seed[3], &seed[4], &seed[5],
&seed[6], &seed[7], &seed[8], &seed[9], &seed[10], &seed[11]) == 12)
return 0;
return 1;
}
static void print_seed(const uint32_t *seed)
{
printf("Random seed: \"--seed={ ");
for (int i = 0; i < 12; i++) {
printf("0x%08x%s", seed[i], (i < 11) ? ", " : " }\"\n");
}
}
#endif

View File

@@ -4,8 +4,168 @@
#define FIPS202_H
#include <stddef.h>
#include <stdint.h>
int SHAKE128(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen);
int SHAKE256(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen);
#define SHAKE128_RATE 168
#define SHAKE256_RATE 136
#define SHA3_256_RATE 136
#define SHA3_384_RATE 104
#define SHA3_512_RATE 72
#define PQC_SHAKEINCCTX_U64WORDS 26
#define PQC_SHAKECTX_U64WORDS 25
#define PQC_SHAKEINCCTX_BYTES (sizeof(uint64_t) * 26)
#define PQC_SHAKECTX_BYTES (sizeof(uint64_t) * 25)
// Context for incremental API
typedef struct {
uint64_t ctx[PQC_SHAKEINCCTX_U64WORDS];
} shake128incctx;
// Context for non-incremental API
typedef struct {
uint64_t ctx[PQC_SHAKECTX_U64WORDS];
} shake128ctx;
// Context for incremental API
typedef struct {
uint64_t ctx[PQC_SHAKEINCCTX_U64WORDS];
} shake256incctx;
// Context for non-incremental API
typedef struct {
uint64_t ctx[PQC_SHAKECTX_U64WORDS];
} shake256ctx;
// Context for incremental API
typedef struct {
uint64_t ctx[PQC_SHAKEINCCTX_U64WORDS];
} sha3_256incctx;
// Context for incremental API
typedef struct {
uint64_t ctx[PQC_SHAKEINCCTX_U64WORDS];
} sha3_384incctx;
// Context for incremental API
typedef struct {
uint64_t ctx[PQC_SHAKEINCCTX_U64WORDS];
} sha3_512incctx;
/* Initialize the state and absorb the provided input.
*
* This function does not support being called multiple times
* with the same state.
*/
void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state);
/* Free the state */
void shake128_ctx_release(shake128ctx *state);
/* Copy the state. */
void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src);
/* Initialize incremental hashing API */
void shake128_inc_init(shake128incctx *state);
/* Absorb more information into the XOF.
*
* Can be called multiple times.
*/
void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen);
/* Finalize the XOF for squeezing */
void shake128_inc_finalize(shake128incctx *state);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state);
/* Copy the context of the SHAKE128 XOF */
void shake128_inc_ctx_clone(shake128incctx *dest, const shake128incctx *src);
/* Free the context of the SHAKE128 XOF */
void shake128_inc_ctx_release(shake128incctx *state);
/* Initialize the state and absorb the provided input.
*
* This function does not support being called multiple times
* with the same state.
*/
void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state);
/* Free the context held by this XOF */
void shake256_ctx_release(shake256ctx *state);
/* Copy the context held by this XOF */
void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src);
/* Initialize incremental hashing API */
void shake256_inc_init(shake256incctx *state);
void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inlen);
/* Prepares for squeeze phase */
void shake256_inc_finalize(shake256incctx *state);
/* Squeeze output out of the sponge.
*
* Supports being called multiple times
*/
void shake256_inc_squeeze(uint8_t *output, size_t outlen, shake256incctx *state);
/* Copy the state */
void shake256_inc_ctx_clone(shake256incctx *dest, const shake256incctx *src);
/* Free the state */
void shake256_inc_ctx_release(shake256incctx *state);
/* One-stop SHAKE128 call */
void shake128(uint8_t *output, size_t outlen,
const uint8_t *input, size_t inlen);
/* One-stop SHAKE256 call */
void shake256(uint8_t *output, size_t outlen,
const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_256_inc_init(sha3_256incctx *state);
/* Absorb blocks into SHA3 */
void sha3_256_inc_absorb(sha3_256incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_256_inc_finalize(uint8_t *output, sha3_256incctx *state);
/* Copy the context */
void sha3_256_inc_ctx_clone(sha3_256incctx *dest, const sha3_256incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_256_inc_ctx_release(sha3_256incctx *state);
void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_384_inc_init(sha3_384incctx *state);
/* Absorb blocks into SHA3 */
void sha3_384_inc_absorb(sha3_384incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_384_inc_finalize(uint8_t *output, sha3_384incctx *state);
/* Copy the context */
void sha3_384_inc_ctx_clone(sha3_384incctx *dest, const sha3_384incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_384_inc_ctx_release(sha3_384incctx *state);
/* One-stop SHA3-384 shop */
void sha3_384(uint8_t *output, const uint8_t *input, size_t inlen);
/* Initialize the incremental hashing state */
void sha3_512_inc_init(sha3_512incctx *state);
/* Absorb blocks into SHA3 */
void sha3_512_inc_absorb(sha3_512incctx *state, const uint8_t *input, size_t inlen);
/* Obtain the output of the function and free `state` */
void sha3_512_inc_finalize(uint8_t *output, sha3_512incctx *state);
/* Copy the context */
void sha3_512_inc_ctx_clone(sha3_512incctx *dest, const sha3_512incctx *src);
/* Release the state, don't use if `_finalize` has been used */
void sha3_512_inc_ctx_release(sha3_512incctx *state);
/* One-stop SHA3-512 shop */
void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen);
#endif

View File

@@ -0,0 +1,49 @@
#ifndef TOOLS_H
#define TOOLS_H
#include <time.h>
// Debug printing:
// https://stackoverflow.com/questions/1644868/define-macro-for-debug-printing-in-c
#ifndef NDEBUG
#define DEBUG_PRINT 1
#else
#define DEBUG_PRINT 0
#endif
#ifndef __FILE_NAME__
#define __FILE_NAME__ "NA"
#endif
#ifndef __LINE__
#define __LINE__ 0
#endif
#ifndef __func__
#define __func__ "NA"
#endif
#define debug_print(fmt) \
do { \
if (DEBUG_PRINT) \
printf("warning: %s, file %s, line %d, function %s().\n", \
fmt, \
__FILE_NAME__, \
__LINE__, \
__func__); \
} while (0)
clock_t tic(void);
float tac(void); /* time in ms since last tic */
float TAC(const char *str); /* same, but prints it with label 'str' */
float toc(const clock_t t); /* time in ms since t */
float TOC(const clock_t t, const char *str); /* same, but prints it with label 'str' */
float TOC_clock(const clock_t t, const char *str);
clock_t dclock(const clock_t t); // return the clock cycle diff between now and t
float clock_to_time(const clock_t t,
const char *str); // convert the number of clock cycles t to time
float clock_print(const clock_t t, const char *str);
#endif

View File

@@ -5,24 +5,27 @@
#include <stdint.h>
#if defined(__GNUC__) || defined(__clang__)
#define BSWAP16(i) __builtin_bswap16((i))
#define BSWAP32(i) __builtin_bswap32((i))
#define BSWAP64(i) __builtin_bswap64((i))
#define UNUSED __attribute__((unused))
#else
#define BSWAP32(i) ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24))
#define BSWAP16(i) ((((i) >> 8) & 0xff) | (((i) & 0xff00) << 8))
#define BSWAP32(i) \
((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24))
#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32)
#define UNUSED
#endif
#if defined(RADIX_64)
#define digit_t uint64_t
#define sdigit_t int64_t
#define DIGIT_LEN 8
#define RADIX 64
#define LOG2RADIX 6
#define BSWAP_DIGIT(i) BSWAP64(i)
#elif defined(RADIX_32)
#define digit_t uint32_t
#define sdigit_t int32_t
#define DIGIT_LEN 4
#define RADIX 32
#define LOG2RADIX 5
#define BSWAP_DIGIT(i) BSWAP32(i)

View File

@@ -1,9 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
#include <mem.h>
#include <string.h>
#include <stdlib.h>
void sqisign_secure_free(void *mem, size_t size) {
void
sqisign_secure_free(void *mem, size_t size)
{
if (mem) {
typedef void *(*memset_t)(void *, int, size_t);
static volatile memset_t memset_func = memset;
@@ -11,7 +14,9 @@ void sqisign_secure_free(void *mem, size_t size) {
free(mem);
}
}
void sqisign_secure_clear(void *mem, size_t size) {
void
sqisign_secure_clear(void *mem, size_t size)
{
typedef void *(*memset_t)(void *, int, size_t);
static volatile memset_t memset_func = memset;
memset_func(mem, 0, size);

View File

@@ -1,140 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 and Unknown
//
/*
NIST-developed software is provided by NIST as a public service. You may use, copy, and distribute copies of the software in any medium, provided that you keep intact this entire notice. You may improve, modify, and create derivative works of the software or any portion of the software, and you may copy and distribute such modifications or works. Modified works should carry a notice stating that you changed the software and should note the date and nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the source of the software.
NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT, OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
You are solely responsible for determining the appropriateness of using and distributing the software and you assume all risks associated with its use, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of operation. This software is not intended to be used in any situation where a failure could cause risk of injury or damage to property. The software developed by NIST employees is not subject to copyright protection within the United States.
*/
#include <string.h>
#include <aes.h>
#ifdef ENABLE_CT_TESTING
#include <valgrind/memcheck.h>
#endif
#define RNG_SUCCESS 0
#define RNG_BAD_MAXLEN -1
#define RNG_BAD_OUTBUF -2
#define RNG_BAD_REQ_LEN -3
static __inline void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer) {
AES_ECB_encrypt(ctr, key, buffer);
}
typedef struct {
unsigned char buffer[16];
int buffer_pos;
unsigned long length_remaining;
unsigned char key[32];
unsigned char ctr[16];
} AES_XOF_struct;
typedef struct {
unsigned char Key[32];
unsigned char V[16];
int reseed_counter;
} AES256_CTR_DRBG_struct;
void
AES256_CTR_DRBG_Update(unsigned char *provided_data,
unsigned char *Key,
unsigned char *V);
AES256_CTR_DRBG_struct DRBG_ctx;
static void
randombytes_init_nist(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
unsigned char seed_material[48];
(void)security_strength; // Unused parameter
memcpy(seed_material, entropy_input, 48);
if (personalization_string)
for (int i = 0; i < 48; i++) {
seed_material[i] ^= personalization_string[i];
}
memset(DRBG_ctx.Key, 0x00, 32);
memset(DRBG_ctx.V, 0x00, 16);
AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter = 1;
}
static int
randombytes_nist(unsigned char *x, size_t xlen) {
unsigned char block[16];
size_t i = 0;
while ( xlen > 0 ) {
//increment V
for (int j = 15; j >= 0; j--) {
if ( DRBG_ctx.V[j] == 0xff ) {
DRBG_ctx.V[j] = 0x00;
} else {
DRBG_ctx.V[j]++;
break;
}
}
AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block);
if ( xlen > 15 ) {
memcpy(x + i, block, 16);
i += 16;
xlen -= 16;
} else {
memcpy(x + i, block, xlen);
i += xlen;
xlen = 0;
}
}
AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter++;
return 0;
}
void
AES256_CTR_DRBG_Update(unsigned char *provided_data,
unsigned char *Key,
unsigned char *V) {
unsigned char temp[48];
for (int i = 0; i < 3; i++) {
//increment V
for (int j = 15; j >= 0; j--) {
if ( V[j] == 0xff ) {
V[j] = 0x00;
} else {
V[j]++;
break;
}
}
AES256_ECB(Key, V, temp + 16 * i);
}
if ( provided_data != NULL )
for (int i = 0; i < 48; i++) {
temp[i] ^= provided_data[i];
}
memcpy(Key, temp, 32);
memcpy(V, temp + 32, 16);
}
int randombytes(unsigned char *random_array, unsigned long long nbytes) {
int ret = randombytes_nist(random_array, nbytes);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret);
#endif
return ret;
}
void
randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
return randombytes_init_nist(entropy_input, personalization_string, security_strength);
}

View File

@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <rng.h>
#ifdef ENABLE_CT_TESTING
#include <valgrind/memcheck.h>
#endif
@@ -28,14 +30,14 @@ THE SOFTWARE.
// *before* randombytes.h is included. Otherwise SYS_getrandom will not be
// declared.
#if defined(__linux__) || defined(__GNU__)
# define _GNU_SOURCE
#define _GNU_SOURCE
#endif /* defined(__linux__) || defined(__GNU__) */
#if defined(_WIN32)
/* Windows */
# include <windows.h>
# include <wincrypt.h> /* CryptAcquireContext, CryptGenRandom */
#endif /* defined(_WIN32) */
#include <windows.h>
#include <wincrypt.h> /* CryptAcquireContext, CryptGenRandom */
#endif /* defined(_WIN32) */
/* wasi */
#if defined(__wasi__)
@@ -44,7 +46,7 @@ THE SOFTWARE.
/* kFreeBSD */
#if defined(__FreeBSD_kernel__) && defined(__GLIBC__)
# define GNU_KFREEBSD
#define GNU_KFREEBSD
#endif
#if defined(__linux__) || defined(__GNU__) || defined(GNU_KFREEBSD)
@@ -53,344 +55,377 @@ THE SOFTWARE.
// to the linux headers. We only need RNDGETENTCNT, so we instead inline it.
// RNDGETENTCNT is originally defined in `include/uapi/linux/random.h` in the
// linux repo.
# define RNDGETENTCNT 0x80045200
#define RNDGETENTCNT 0x80045200
# include <assert.h>
# include <errno.h>
# include <fcntl.h>
# include <poll.h>
# include <stdint.h>
# include <stdio.h>
# include <sys/ioctl.h>
# if (defined(__linux__) || defined(__GNU__)) && defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC_MINOR__ > 24))
# define USE_GLIBC
# include <sys/random.h>
# endif /* (defined(__linux__) || defined(__GNU__)) && defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC_MINOR__ > 24)) */
# include <sys/stat.h>
# include <sys/syscall.h>
# include <sys/types.h>
# include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/ioctl.h>
#if (defined(__linux__) || defined(__GNU__)) && defined(__GLIBC__) && \
((__GLIBC__ > 2) || (__GLIBC_MINOR__ > 24))
#define USE_GLIBC
#include <sys/random.h>
#endif /* (defined(__linux__) || defined(__GNU__)) && defined(__GLIBC__) && ((__GLIBC__ > 2) || \
(__GLIBC_MINOR__ > 24)) */
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
// We need SSIZE_MAX as the maximum read len from /dev/urandom
# if !defined(SSIZE_MAX)
# define SSIZE_MAX (SIZE_MAX / 2 - 1)
# endif /* defined(SSIZE_MAX) */
#if !defined(SSIZE_MAX)
#define SSIZE_MAX (SIZE_MAX / 2 - 1)
#endif /* defined(SSIZE_MAX) */
#endif /* defined(__linux__) || defined(__GNU__) || defined(GNU_KFREEBSD) */
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
/* Dragonfly, FreeBSD, NetBSD, OpenBSD (has arc4random) */
# include <sys/param.h>
# if defined(BSD)
# include <stdlib.h>
# endif
#include <sys/param.h>
#if defined(BSD)
#include <stdlib.h>
#endif
/* GNU/Hurd defines BSD in sys/param.h which causes problems later */
# if defined(__GNU__)
# undef BSD
# endif
#if defined(__GNU__)
#undef BSD
#endif
#endif
#if defined(__EMSCRIPTEN__)
# include <assert.h>
# include <emscripten.h>
# include <errno.h>
# include <stdbool.h>
#include <assert.h>
#include <emscripten.h>
#include <errno.h>
#include <stdbool.h>
#endif /* defined(__EMSCRIPTEN__) */
#if defined(_WIN32)
static int randombytes_win32_randombytes(void* buf, size_t n)
static int
randombytes_win32_randombytes(void *buf, size_t n)
{
HCRYPTPROV ctx;
BOOL tmp;
DWORD to_read = 0;
const size_t MAX_DWORD = 0xFFFFFFFF;
HCRYPTPROV ctx;
BOOL tmp;
DWORD to_read = 0;
const size_t MAX_DWORD = 0xFFFFFFFF;
tmp = CryptAcquireContext(&ctx, NULL, NULL, PROV_RSA_FULL,
CRYPT_VERIFYCONTEXT);
if (tmp == FALSE) return -1;
tmp = CryptAcquireContext(&ctx, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
if (tmp == FALSE)
return -1;
while (n > 0) {
to_read = (DWORD)(n < MAX_DWORD ? n : MAX_DWORD);
tmp = CryptGenRandom(ctx, to_read, (BYTE*) buf);
if (tmp == FALSE) return -1;
buf = ((char*)buf) + to_read;
n -= to_read;
}
while (n > 0) {
to_read = (DWORD)(n < MAX_DWORD ? n : MAX_DWORD);
tmp = CryptGenRandom(ctx, to_read, (BYTE *)buf);
if (tmp == FALSE)
return -1;
buf = ((char *)buf) + to_read;
n -= to_read;
}
tmp = CryptReleaseContext(ctx, 0);
if (tmp == FALSE) return -1;
tmp = CryptReleaseContext(ctx, 0);
if (tmp == FALSE)
return -1;
return 0;
return 0;
}
#endif /* defined(_WIN32) */
#if defined(__wasi__)
static int randombytes_wasi_randombytes(void *buf, size_t n) {
arc4random_buf(buf, n);
return 0;
static int
randombytes_wasi_randombytes(void *buf, size_t n)
{
arc4random_buf(buf, n);
return 0;
}
#endif /* defined(__wasi__) */
#if (defined(__linux__) || defined(__GNU__)) && (defined(USE_GLIBC) || defined(SYS_getrandom))
# if defined(USE_GLIBC)
#if defined(USE_GLIBC)
// getrandom is declared in glibc.
# elif defined(SYS_getrandom)
static ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) {
return syscall(SYS_getrandom, buf, buflen, flags);
}
# endif
static int randombytes_linux_randombytes_getrandom(void *buf, size_t n)
#elif defined(SYS_getrandom)
static ssize_t
getrandom(void *buf, size_t buflen, unsigned int flags)
{
/* I have thought about using a separate PRF, seeded by getrandom, but
* it turns out that the performance of getrandom is good enough
* (250 MB/s on my laptop).
*/
size_t offset = 0, chunk;
int ret;
while (n > 0) {
/* getrandom does not allow chunks larger than 33554431 */
chunk = n <= 33554431 ? n : 33554431;
do {
ret = getrandom((char *)buf + offset, chunk, 0);
} while (ret == -1 && errno == EINTR);
if (ret < 0) return ret;
offset += ret;
n -= ret;
}
assert(n == 0);
return 0;
return syscall(SYS_getrandom, buf, buflen, flags);
}
#endif /* (defined(__linux__) || defined(__GNU__)) && (defined(USE_GLIBC) || defined(SYS_getrandom)) */
#endif
static int
randombytes_linux_randombytes_getrandom(void *buf, size_t n)
{
/* I have thought about using a separate PRF, seeded by getrandom, but
* it turns out that the performance of getrandom is good enough
* (250 MB/s on my laptop).
*/
size_t offset = 0, chunk;
int ret;
while (n > 0) {
/* getrandom does not allow chunks larger than 33554431 */
chunk = n <= 33554431 ? n : 33554431;
do {
ret = getrandom((char *)buf + offset, chunk, 0);
} while (ret == -1 && errno == EINTR);
if (ret < 0)
return ret;
offset += ret;
n -= ret;
}
assert(n == 0);
return 0;
}
#endif /* (defined(__linux__) || defined(__GNU__)) && (defined(USE_GLIBC) || \
defined(SYS_getrandom)) */
#if (defined(__linux__) || defined(GNU_KFREEBSD)) && !defined(SYS_getrandom)
# if defined(__linux__)
static int randombytes_linux_read_entropy_ioctl(int device, int *entropy)
#if defined(__linux__)
static int
randombytes_linux_read_entropy_ioctl(int device, int *entropy)
{
return ioctl(device, RNDGETENTCNT, entropy);
return ioctl(device, RNDGETENTCNT, entropy);
}
static int randombytes_linux_read_entropy_proc(FILE *stream, int *entropy)
static int
randombytes_linux_read_entropy_proc(FILE *stream, int *entropy)
{
int retcode;
do {
rewind(stream);
retcode = fscanf(stream, "%d", entropy);
} while (retcode != 1 && errno == EINTR);
if (retcode != 1) {
return -1;
}
return 0;
int retcode;
do {
rewind(stream);
retcode = fscanf(stream, "%d", entropy);
} while (retcode != 1 && errno == EINTR);
if (retcode != 1) {
return -1;
}
return 0;
}
static int randombytes_linux_wait_for_entropy(int device)
static int
randombytes_linux_wait_for_entropy(int device)
{
/* We will block on /dev/random, because any increase in the OS' entropy
* level will unblock the request. I use poll here (as does libsodium),
* because we don't *actually* want to read from the device. */
enum { IOCTL, PROC } strategy = IOCTL;
const int bits = 128;
struct pollfd pfd;
int fd;
FILE *proc_file;
int retcode, retcode_error = 0; // Used as return codes throughout this function
int entropy = 0;
/* We will block on /dev/random, because any increase in the OS' entropy
* level will unblock the request. I use poll here (as does libsodium),
* because we don't *actually* want to read from the device. */
enum
{
IOCTL,
PROC
} strategy = IOCTL;
const int bits = 128;
struct pollfd pfd;
int fd;
FILE *proc_file;
int retcode, retcode_error = 0; // Used as return codes throughout this function
int entropy = 0;
/* If the device has enough entropy already, we will want to return early */
retcode = randombytes_linux_read_entropy_ioctl(device, &entropy);
// printf("errno: %d (%s)\n", errno, strerror(errno));
if (retcode != 0 && (errno == ENOTTY || errno == ENOSYS)) {
// The ioctl call on /dev/urandom has failed due to a
// - ENOTTY (unsupported action), or
// - ENOSYS (invalid ioctl; this happens on MIPS, see #22).
//
// We will fall back to reading from
// `/proc/sys/kernel/random/entropy_avail`. This less ideal,
// because it allocates a file descriptor, and it may not work
// in a chroot. But at this point it seems we have no better
// options left.
strategy = PROC;
// Open the entropy count file
proc_file = fopen("/proc/sys/kernel/random/entropy_avail", "r");
if (proc_file == NULL) {
return -1;
}
} else if (retcode != 0) {
// Unrecoverable ioctl error
return -1;
}
if (entropy >= bits) {
return 0;
}
/* If the device has enough entropy already, we will want to return early */
retcode = randombytes_linux_read_entropy_ioctl(device, &entropy);
// printf("errno: %d (%s)\n", errno, strerror(errno));
if (retcode != 0 && (errno == ENOTTY || errno == ENOSYS)) {
// The ioctl call on /dev/urandom has failed due to a
// - ENOTTY (unsupported action), or
// - ENOSYS (invalid ioctl; this happens on MIPS, see #22).
//
// We will fall back to reading from
// `/proc/sys/kernel/random/entropy_avail`. This less ideal,
// because it allocates a file descriptor, and it may not work
// in a chroot. But at this point it seems we have no better
// options left.
strategy = PROC;
// Open the entropy count file
proc_file = fopen("/proc/sys/kernel/random/entropy_avail", "r");
if (proc_file == NULL) {
return -1;
}
} else if (retcode != 0) {
// Unrecoverable ioctl error
return -1;
}
if (entropy >= bits) {
return 0;
}
do {
fd = open("/dev/random", O_RDONLY);
} while (fd == -1 && errno == EINTR); /* EAGAIN will not occur */
if (fd == -1) {
/* Unrecoverable IO error */
return -1;
}
do {
fd = open("/dev/random", O_RDONLY);
} while (fd == -1 && errno == EINTR); /* EAGAIN will not occur */
if (fd == -1) {
/* Unrecoverable IO error */
return -1;
}
pfd.fd = fd;
pfd.events = POLLIN;
for (;;) {
retcode = poll(&pfd, 1, -1);
if (retcode == -1 && (errno == EINTR || errno == EAGAIN)) {
continue;
} else if (retcode == 1) {
if (strategy == IOCTL) {
retcode = randombytes_linux_read_entropy_ioctl(device, &entropy);
} else if (strategy == PROC) {
retcode = randombytes_linux_read_entropy_proc(proc_file, &entropy);
} else {
return -1; // Unreachable
}
pfd.fd = fd;
pfd.events = POLLIN;
for (;;) {
retcode = poll(&pfd, 1, -1);
if (retcode == -1 && (errno == EINTR || errno == EAGAIN)) {
continue;
} else if (retcode == 1) {
if (strategy == IOCTL) {
retcode = randombytes_linux_read_entropy_ioctl(device, &entropy);
} else if (strategy == PROC) {
retcode = randombytes_linux_read_entropy_proc(proc_file, &entropy);
} else {
return -1; // Unreachable
}
if (retcode != 0) {
// Unrecoverable I/O error
retcode_error = retcode;
break;
}
if (entropy >= bits) {
break;
}
} else {
// Unreachable: poll() should only return -1 or 1
retcode_error = -1;
break;
}
}
do {
retcode = close(fd);
} while (retcode == -1 && errno == EINTR);
if (strategy == PROC) {
do {
retcode = fclose(proc_file);
} while (retcode == -1 && errno == EINTR);
}
if (retcode_error != 0) {
return retcode_error;
}
return retcode;
if (retcode != 0) {
// Unrecoverable I/O error
retcode_error = retcode;
break;
}
if (entropy >= bits) {
break;
}
} else {
// Unreachable: poll() should only return -1 or 1
retcode_error = -1;
break;
}
}
do {
retcode = close(fd);
} while (retcode == -1 && errno == EINTR);
if (strategy == PROC) {
do {
retcode = fclose(proc_file);
} while (retcode == -1 && errno == EINTR);
}
if (retcode_error != 0) {
return retcode_error;
}
return retcode;
}
# endif /* defined(__linux__) */
#endif /* defined(__linux__) */
static int randombytes_linux_randombytes_urandom(void *buf, size_t n)
static int
randombytes_linux_randombytes_urandom(void *buf, size_t n)
{
int fd;
size_t offset = 0, count;
ssize_t tmp;
do {
fd = open("/dev/urandom", O_RDONLY);
} while (fd == -1 && errno == EINTR);
if (fd == -1) return -1;
# if defined(__linux__)
if (randombytes_linux_wait_for_entropy(fd) == -1) return -1;
# endif
int fd;
size_t offset = 0, count;
ssize_t tmp;
do {
fd = open("/dev/urandom", O_RDONLY);
} while (fd == -1 && errno == EINTR);
if (fd == -1)
return -1;
#if defined(__linux__)
if (randombytes_linux_wait_for_entropy(fd) == -1)
return -1;
#endif
while (n > 0) {
count = n <= SSIZE_MAX ? n : SSIZE_MAX;
tmp = read(fd, (char *)buf + offset, count);
if (tmp == -1 && (errno == EAGAIN || errno == EINTR)) {
continue;
}
if (tmp == -1) return -1; /* Unrecoverable IO error */
offset += tmp;
n -= tmp;
}
close(fd);
assert(n == 0);
return 0;
while (n > 0) {
count = n <= SSIZE_MAX ? n : SSIZE_MAX;
tmp = read(fd, (char *)buf + offset, count);
if (tmp == -1 && (errno == EAGAIN || errno == EINTR)) {
continue;
}
if (tmp == -1)
return -1; /* Unrecoverable IO error */
offset += tmp;
n -= tmp;
}
close(fd);
assert(n == 0);
return 0;
}
#endif /* defined(__linux__) && !defined(SYS_getrandom) */
#if defined(BSD)
static int randombytes_bsd_randombytes(void *buf, size_t n)
static int
randombytes_bsd_randombytes(void *buf, size_t n)
{
arc4random_buf(buf, n);
return 0;
arc4random_buf(buf, n);
return 0;
}
#endif /* defined(BSD) */
#if defined(__EMSCRIPTEN__)
static int randombytes_js_randombytes_nodejs(void *buf, size_t n) {
const int ret = EM_ASM_INT({
var crypto;
try {
crypto = require('crypto');
} catch (error) {
return -2;
}
try {
writeArrayToMemory(crypto.randomBytes($1), $0);
return 0;
} catch (error) {
return -1;
}
}, buf, n);
switch (ret) {
case 0:
return 0;
case -1:
errno = EINVAL;
return -1;
case -2:
errno = ENOSYS;
return -1;
}
assert(false); // Unreachable
static int
randombytes_js_randombytes_nodejs(void *buf, size_t n)
{
const int ret = EM_ASM_INT(
{
var crypto;
try {
crypto = require('crypto');
} catch (error) {
return -2;
}
try {
writeArrayToMemory(crypto.randomBytes($1), $0);
return 0;
} catch (error) {
return -1;
}
},
buf,
n);
switch (ret) {
case 0:
return 0;
case -1:
errno = EINVAL;
return -1;
case -2:
errno = ENOSYS;
return -1;
}
assert(false); // Unreachable
}
#endif /* defined(__EMSCRIPTEN__) */
static int randombytes_select(void *buf, size_t n)
SQISIGN_API
int
randombytes_select(unsigned char *buf, unsigned long long n)
{
#if defined(__EMSCRIPTEN__)
return randombytes_js_randombytes_nodejs(buf, n);
return randombytes_js_randombytes_nodejs(buf, n);
#elif defined(__linux__) || defined(__GNU__) || defined(GNU_KFREEBSD)
# if defined(USE_GLIBC)
/* Use getrandom system call */
return randombytes_linux_randombytes_getrandom(buf, n);
# elif defined(SYS_getrandom)
/* Use getrandom system call */
return randombytes_linux_randombytes_getrandom(buf, n);
# else
/* When we have enough entropy, we can read from /dev/urandom */
return randombytes_linux_randombytes_urandom(buf, n);
# endif
#elif defined(BSD)
/* Use arc4random system call */
return randombytes_bsd_randombytes(buf, n);
#elif defined(_WIN32)
/* Use windows API */
return randombytes_win32_randombytes(buf, n);
#elif defined(__wasi__)
/* Use WASI */
return randombytes_wasi_randombytes(buf, n);
#if defined(USE_GLIBC)
/* Use getrandom system call */
return randombytes_linux_randombytes_getrandom(buf, n);
#elif defined(SYS_getrandom)
/* Use getrandom system call */
return randombytes_linux_randombytes_getrandom(buf, n);
#else
# error "randombytes(...) is not supported on this platform"
/* When we have enough entropy, we can read from /dev/urandom */
return randombytes_linux_randombytes_urandom(buf, n);
#endif
#elif defined(BSD)
/* Use arc4random system call */
return randombytes_bsd_randombytes(buf, n);
#elif defined(_WIN32)
/* Use windows API */
return randombytes_win32_randombytes(buf, n);
#elif defined(__wasi__)
/* Use WASI */
return randombytes_wasi_randombytes(buf, n);
#else
#error "randombytes(...) is not supported on this platform"
#endif
}
int randombytes(unsigned char *x, unsigned long long xlen) {
#ifdef RANDOMBYTES_SYSTEM
SQISIGN_API
int
randombytes(unsigned char *x, unsigned long long xlen)
{
int ret = randombytes_select(x, (size_t) xlen);
int ret = randombytes_select(x, (size_t)xlen);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(x, xlen);
#endif
return ret;
}
void randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
(void) entropy_input;
(void) personalization_string;
(void) security_strength;
SQISIGN_API
void
randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength)
{
(void)entropy_input;
(void)personalization_string;
(void)security_strength;
}
#endif

View File

@@ -0,0 +1,57 @@
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "bench.h"
#define RANDOMBYTES_MAX_LENGTH 131072
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
void
randombytes_init_nist(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength);
int
randombytes_nist(unsigned char *x, size_t xlen);
void
RANDOMBYTES_INIT_PLATFORM(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength);
int
RANDOMBYTES_PLATFORM(unsigned char *x, size_t xlen);
int
randombytes_select(void *buf, size_t n);
// run all tests in module
int main(int argc, char *argv[]) {
#ifndef NDEBUG
fprintf(stderr,
"\x1b[31mIt looks like SQIsign was compiled with assertions enabled.\n"
"This will severely impact performance measurements.\x1b[0m\n");
#endif
printf("Running AES-CTR-DRBG benchmarks\n");
unsigned char x[RANDOMBYTES_MAX_LENGTH];
cpucycles_init();
BENCH_CODE_1(1000 * SQISIGN_TEST_REPS);
RANDOMBYTES_PLATFORM(x, RANDOMBYTES_MAX_LENGTH);
BENCH_CODE_2(STRINGIFY(RANDOMBYTES_PLATFORM));
BENCH_CODE_1(SQISIGN_TEST_REPS);
randombytes_nist(x, RANDOMBYTES_MAX_LENGTH);
BENCH_CODE_2("randombytes_nist");
BENCH_CODE_1(1000 * SQISIGN_TEST_REPS);
randombytes_select(x, RANDOMBYTES_MAX_LENGTH);
BENCH_CODE_2("randombytes_system");
return 0;
}

View File

@@ -0,0 +1,68 @@
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#define RANDOMBYTES_MAX_LENGTH 131072
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
void
randombytes_init_nist(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength);
int
randombytes_nist(unsigned char *x, size_t xlen);
void
RANDOMBYTES_INIT_PLATFORM(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength);
int
RANDOMBYTES_PLATFORM(unsigned char *x, size_t xlen);
int
randombytes_select(void *buf, size_t n);
// run all tests in module
int main(int argc, char *argv[]) {
int res = 1;
printf("Running AES-CTR-DRBG unit tests\n");
unsigned char seed[48];
unsigned char x_nist[RANDOMBYTES_MAX_LENGTH], x_platform[RANDOMBYTES_MAX_LENGTH];
for (int i = 0; i < 8; i++) {
for (unsigned j = 0; j < sizeof(seed); j++) {
seed[j] = 1 << i;
}
RANDOMBYTES_INIT_PLATFORM(seed, NULL, 256);
randombytes_init_nist(seed, NULL, 256);
for (int j = RANDOMBYTES_MAX_LENGTH; j <= RANDOMBYTES_MAX_LENGTH; j *= 2) {
RANDOMBYTES_PLATFORM(x_platform, j);
randombytes_nist(x_nist, j);
if (memcmp(x_platform, x_nist, j) != 0) {
for (int k = 0; k < j; k++) {
if (x_platform[k] != x_nist[k]) {
printf("Test failed for seed = %d, length = %d bytes: mismatch at index %d: %d != %d\n", i, j, k, x_platform[k], x_nist[k]);
break;
}
}
res = 0;
}
}
}
if (!res) {
printf("\nSome tests failed!\n");
} else {
printf("\nAll tests passed!\n");
}
return (!res);
}

View File

@@ -0,0 +1,75 @@
#include <stdio.h>
#include <time.h>
static clock_t global_timer;
clock_t
tic(void)
{
global_timer = clock();
return global_timer;
}
float
tac(void)
{
float ms = (1000. * (float)(clock() - global_timer) / CLOCKS_PER_SEC);
return ms;
}
float
TAC(const char *str)
{
float ms = (1000. * (float)(clock() - global_timer) / CLOCKS_PER_SEC);
#ifndef NDEBUG
printf("%s [%d ms]\n", str, (int)ms);
#endif
return ms;
}
float
toc(const clock_t t)
{
float ms = (1000. * (float)(clock() - t) / CLOCKS_PER_SEC);
return ms;
}
float
TOC(const clock_t t, const char *str)
{
float ms = (1000. * (float)(clock() - t) / CLOCKS_PER_SEC);
printf("%s [%d ms]\n", str, (int)ms);
return ms;
// printf("%s [%ld]\n",str,clock()-t);
// return (float) (clock()-t);
}
float
TOC_clock(const clock_t t, const char *str)
{
printf("%s [%ld]\n", str, clock() - t);
return (float)(clock() - t);
}
clock_t
dclock(const clock_t t)
{
return (clock() - t);
}
float
clock_to_time(const clock_t t, const char *str)
{
float ms = (1000. * (float)(t) / CLOCKS_PER_SEC);
printf("%s [%d ms]\n", str, (int)ms);
return ms;
// printf("%s [%ld]\n",str,t);
// return (float) (t);
}
float
clock_print(const clock_t t, const char *str)
{
printf("%s [%ld]\n", str, t);
return (float)(t);
}

View File

@@ -0,0 +1,10 @@
set(SOURCE_FILES_COMMON_TEST_REF
randombytes_ctrdrbg.c
aes_c.c
)
target_sources(sqisign_common_test PRIVATE ${SOURCE_FILES_COMMON_TEST_REF})
target_include_directories(sqisign_common_test PRIVATE include)
target_compile_definitions(sqisign_common_test PRIVATE RANDOMBYTES_C)
target_compile_definitions(sqisign_common_sys PRIVATE RANDOMBYTES_SYSTEM)

View File

@@ -39,23 +39,24 @@
#define AESCTR_NONCEBYTES 12
#define AES_BLOCKBYTES 16
// We've put these states on the heap to make sure ctx_release is used.
#define PQC_AES128_STATESIZE 88
typedef struct {
uint64_t *sk_exp;
typedef struct
{
uint64_t sk_exp[PQC_AES128_STATESIZE];
} aes128ctx;
#define PQC_AES192_STATESIZE 104
typedef struct {
uint64_t *sk_exp;
typedef struct
{
uint64_t sk_exp[PQC_AES192_STATESIZE];
} aes192ctx;
#define PQC_AES256_STATESIZE 120
typedef struct {
uint64_t *sk_exp;
typedef struct
{
uint64_t sk_exp[PQC_AES256_STATESIZE];
} aes256ctx;
/** Initializes the context **/
void aes128_ecb_keyexp(aes128ctx *r, const unsigned char *key);
@@ -68,7 +69,6 @@ void aes128_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, cons
/** Frees the context **/
void aes128_ctx_release(aes128ctx *r);
/** Initializes the context **/
void aes192_ecb_keyexp(aes192ctx *r, const unsigned char *key);
@@ -80,7 +80,6 @@ void aes192_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, cons
void aes192_ctx_release(aes192ctx *r);
/** Initializes the context **/
void aes256_ecb_keyexp(aes256ctx *r, const unsigned char *key);
@@ -93,46 +92,50 @@ void aes256_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, cons
/** Frees the context **/
void aes256_ctx_release(aes256ctx *r);
static inline uint32_t br_dec32le(const unsigned char *src) {
return (uint32_t)src[0]
| ((uint32_t)src[1] << 8)
| ((uint32_t)src[2] << 16)
| ((uint32_t)src[3] << 24);
static inline uint32_t
br_dec32le(const unsigned char *src)
{
return (uint32_t)src[0] | ((uint32_t)src[1] << 8) | ((uint32_t)src[2] << 16) |
((uint32_t)src[3] << 24);
}
static void br_range_dec32le(uint32_t *v, size_t num, const unsigned char *src) {
static void
br_range_dec32le(uint32_t *v, size_t num, const unsigned char *src)
{
while (num-- > 0) {
*v ++ = br_dec32le(src);
*v++ = br_dec32le(src);
src += 4;
}
}
static inline uint32_t br_swap32(uint32_t x) {
x = ((x & (uint32_t)0x00FF00FF) << 8)
| ((x >> 8) & (uint32_t)0x00FF00FF);
static inline uint32_t
br_swap32(uint32_t x)
{
x = ((x & (uint32_t)0x00FF00FF) << 8) | ((x >> 8) & (uint32_t)0x00FF00FF);
return (x << 16) | (x >> 16);
}
static inline void br_enc32le(unsigned char *dst, uint32_t x) {
static inline void
br_enc32le(unsigned char *dst, uint32_t x)
{
dst[0] = (unsigned char)x;
dst[1] = (unsigned char)(x >> 8);
dst[2] = (unsigned char)(x >> 16);
dst[3] = (unsigned char)(x >> 24);
}
static void br_range_enc32le(unsigned char *dst, const uint32_t *v, size_t num) {
static void
br_range_enc32le(unsigned char *dst, const uint32_t *v, size_t num)
{
while (num-- > 0) {
br_enc32le(dst, *v ++);
br_enc32le(dst, *v++);
dst += 4;
}
}
static void br_aes_ct64_bitslice_Sbox(uint64_t *q) {
static void
br_aes_ct64_bitslice_Sbox(uint64_t *q)
{
/*
* This S-box implementation is a straightforward translation of
* the circuit described by Boyar and Peralta in "A new
@@ -306,18 +309,21 @@ static void br_aes_ct64_bitslice_Sbox(uint64_t *q) {
q[0] = s7;
}
static void br_aes_ct64_ortho(uint64_t *q) {
#define SWAPN(cl, ch, s, x, y) do { \
uint64_t a, b; \
a = (x); \
b = (y); \
(x) = (a & (uint64_t)(cl)) | ((b & (uint64_t)(cl)) << (s)); \
(y) = ((a & (uint64_t)(ch)) >> (s)) | (b & (uint64_t)(ch)); \
static void
br_aes_ct64_ortho(uint64_t *q)
{
#define SWAPN(cl, ch, s, x, y) \
do { \
uint64_t a, b; \
a = (x); \
b = (y); \
(x) = (a & (uint64_t)(cl)) | ((b & (uint64_t)(cl)) << (s)); \
(y) = ((a & (uint64_t)(ch)) >> (s)) | (b & (uint64_t)(ch)); \
} while (0)
#define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
#define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
#define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
#define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
#define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
#define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
SWAP2(q[0], q[1]);
SWAP2(q[2], q[3]);
@@ -335,8 +341,9 @@ static void br_aes_ct64_ortho(uint64_t *q) {
SWAP8(q[3], q[7]);
}
static void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w) {
static void
br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
{
uint64_t x0, x1, x2, x3;
x0 = w[0];
@@ -363,8 +370,9 @@ static void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t
*q1 = x1 | (x3 << 8);
}
static void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1) {
static void
br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
{
uint64_t x0, x1, x2, x3;
x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
@@ -385,11 +393,11 @@ static void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1) {
w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
}
static const unsigned char Rcon[] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
};
static const unsigned char Rcon[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36 };
static uint32_t sub_word(uint32_t x) {
static uint32_t
sub_word(uint32_t x)
{
uint64_t q[8];
memset(q, 0, sizeof q);
@@ -400,7 +408,9 @@ static uint32_t sub_word(uint32_t x) {
return (uint32_t)q[0];
}
static void br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key, unsigned int key_len) {
static void
br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key, unsigned int key_len)
{
unsigned int i, j, k, nk, nkf;
uint32_t tmp;
uint32_t skey[60];
@@ -410,7 +420,7 @@ static void br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key,
nkf = ((nrounds + 1) << 2);
br_range_dec32le(skey, (key_len >> 2), key);
tmp = skey[(key_len >> 2) - 1];
for (i = nk, j = 0, k = 0; i < nkf; i ++) {
for (i = nk, j = 0, k = 0; i < nkf; i++) {
if (j == 0) {
tmp = (tmp << 24) | (tmp >> 8);
tmp = sub_word(tmp) ^ Rcon[k];
@@ -419,9 +429,9 @@ static void br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key,
}
tmp ^= skey[i - nk];
skey[i] = tmp;
if (++ j == nk) {
if (++j == nk) {
j = 0;
k ++;
k++;
}
}
@@ -437,23 +447,21 @@ static void br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key,
q[7] = q[4];
br_aes_ct64_ortho(q);
comp_skey[j + 0] =
(q[0] & (uint64_t)0x1111111111111111)
| (q[1] & (uint64_t)0x2222222222222222)
| (q[2] & (uint64_t)0x4444444444444444)
| (q[3] & (uint64_t)0x8888888888888888);
(q[0] & (uint64_t)0x1111111111111111) | (q[1] & (uint64_t)0x2222222222222222) |
(q[2] & (uint64_t)0x4444444444444444) | (q[3] & (uint64_t)0x8888888888888888);
comp_skey[j + 1] =
(q[4] & (uint64_t)0x1111111111111111)
| (q[5] & (uint64_t)0x2222222222222222)
| (q[6] & (uint64_t)0x4444444444444444)
| (q[7] & (uint64_t)0x8888888888888888);
(q[4] & (uint64_t)0x1111111111111111) | (q[5] & (uint64_t)0x2222222222222222) |
(q[6] & (uint64_t)0x4444444444444444) | (q[7] & (uint64_t)0x8888888888888888);
}
}
static void br_aes_ct64_skey_expand(uint64_t *skey, const uint64_t *comp_skey, unsigned int nrounds) {
static void
br_aes_ct64_skey_expand(uint64_t *skey, const uint64_t *comp_skey, unsigned int nrounds)
{
unsigned u, v, n;
n = (nrounds + 1) << 1;
for (u = 0, v = 0; u < n; u ++, v += 4) {
for (u = 0, v = 0; u < n; u++, v += 4) {
uint64_t x0, x1, x2, x3;
x0 = x1 = x2 = x3 = comp_skey[u];
@@ -471,8 +479,9 @@ static void br_aes_ct64_skey_expand(uint64_t *skey, const uint64_t *comp_skey, u
}
}
static inline void add_round_key(uint64_t *q, const uint64_t *sk) {
static inline void
add_round_key(uint64_t *q, const uint64_t *sk)
{
q[0] ^= sk[0];
q[1] ^= sk[1];
q[2] ^= sk[2];
@@ -483,28 +492,32 @@ static inline void add_round_key(uint64_t *q, const uint64_t *sk) {
q[7] ^= sk[7];
}
static inline void shift_rows(uint64_t *q) {
static inline void
shift_rows(uint64_t *q)
{
int i;
for (i = 0; i < 8; i ++) {
for (i = 0; i < 8; i++) {
uint64_t x;
x = q[i];
q[i] = (x & (uint64_t)0x000000000000FFFF)
| ((x & (uint64_t)0x00000000FFF00000) >> 4)
| ((x & (uint64_t)0x00000000000F0000) << 12)
| ((x & (uint64_t)0x0000FF0000000000) >> 8)
| ((x & (uint64_t)0x000000FF00000000) << 8)
| ((x & (uint64_t)0xF000000000000000) >> 12)
| ((x & (uint64_t)0x0FFF000000000000) << 4);
q[i] =
(x & (uint64_t)0x000000000000FFFF) | ((x & (uint64_t)0x00000000FFF00000) >> 4) |
((x & (uint64_t)0x00000000000F0000) << 12) | ((x & (uint64_t)0x0000FF0000000000) >> 8) |
((x & (uint64_t)0x000000FF00000000) << 8) | ((x & (uint64_t)0xF000000000000000) >> 12) |
((x & (uint64_t)0x0FFF000000000000) << 4);
}
}
static inline uint64_t rotr32(uint64_t x) {
static inline uint64_t
rotr32(uint64_t x)
{
return (x << 32) | (x >> 32);
}
static inline void mix_columns(uint64_t *q) {
static inline void
mix_columns(uint64_t *q)
{
uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
@@ -535,14 +548,19 @@ static inline void mix_columns(uint64_t *q) {
q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
}
static void inc4_be(uint32_t *x) {
static void
inc4_be(uint32_t *x)
{
uint32_t t = br_swap32(*x) + 4;
*x = br_swap32(t);
}
static void aes_ecb4x(unsigned char out[64], const uint32_t ivw[16], const uint64_t *sk_exp, unsigned int nrounds) {
static void
aes_ecb4x(unsigned char out[64],
const uint32_t ivw[16],
const uint64_t *sk_exp,
unsigned int nrounds)
{
uint32_t w[16];
uint64_t q[8];
unsigned int i;
@@ -553,7 +571,6 @@ static void aes_ecb4x(unsigned char out[64], const uint32_t ivw[16], const uint6
}
br_aes_ct64_ortho(q);
add_round_key(q, sk_exp);
for (i = 1; i < nrounds; i++) {
br_aes_ct64_bitslice_Sbox(q);
@@ -566,14 +583,15 @@ static void aes_ecb4x(unsigned char out[64], const uint32_t ivw[16], const uint6
add_round_key(q, sk_exp + 8 * nrounds);
br_aes_ct64_ortho(q);
for (i = 0; i < 4; i ++) {
for (i = 0; i < 4; i++) {
br_aes_ct64_interleave_out(w + (i << 2), q[i], q[i + 4]);
}
br_range_enc32le(out, w, 16);
}
static void aes_ctr4x(unsigned char out[64], uint32_t ivw[16], const uint64_t *sk_exp, unsigned int nrounds) {
static void
aes_ctr4x(unsigned char out[64], uint32_t ivw[16], const uint64_t *sk_exp, unsigned int nrounds)
{
aes_ecb4x(out, ivw, sk_exp, nrounds);
/* Increase counter for next 4 blocks */
@@ -583,8 +601,13 @@ static void aes_ctr4x(unsigned char out[64], uint32_t ivw[16], const uint64_t *s
inc4_be(ivw + 15);
}
static void aes_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const uint64_t *rkeys, unsigned int nrounds) {
static void
aes_ecb(unsigned char *out,
const unsigned char *in,
size_t nblocks,
const uint64_t *rkeys,
unsigned int nrounds)
{
uint32_t blocks[16];
unsigned char t[64];
@@ -603,18 +626,23 @@ static void aes_ecb(unsigned char *out, const unsigned char *in, size_t nblocks,
}
}
static void aes_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const uint64_t *rkeys, unsigned int nrounds) {
static void
aes_ctr(unsigned char *out,
size_t outlen,
const unsigned char *iv,
const uint64_t *rkeys,
unsigned int nrounds)
{
uint32_t ivw[16];
size_t i;
uint32_t cc = 0;
br_range_dec32le(ivw, 3, iv);
memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
ivw[ 3] = br_swap32(cc);
ivw[ 7] = br_swap32(cc + 1);
ivw[3] = br_swap32(cc);
ivw[7] = br_swap32(cc + 1);
ivw[11] = br_swap32(cc + 2);
ivw[15] = br_swap32(cc + 3);
@@ -632,97 +660,110 @@ static void aes_ctr(unsigned char *out, size_t outlen, const unsigned char *iv,
}
}
void aes128_ecb_keyexp(aes128ctx *r, const unsigned char *key) {
void
aes128_ecb_keyexp(aes128ctx *r, const unsigned char *key)
{
uint64_t skey[22];
r->sk_exp = malloc(sizeof(uint64_t) * PQC_AES128_STATESIZE);
if (r->sk_exp == NULL) {
exit(111);
}
br_aes_ct64_keysched(skey, key, 16);
br_aes_ct64_skey_expand(r->sk_exp, skey, 10);
}
void aes128_ctr_keyexp(aes128ctx *r, const unsigned char *key) {
void
aes128_ctr_keyexp(aes128ctx *r, const unsigned char *key)
{
aes128_ecb_keyexp(r, key);
}
void aes192_ecb_keyexp(aes192ctx *r, const unsigned char *key) {
void
aes192_ecb_keyexp(aes192ctx *r, const unsigned char *key)
{
uint64_t skey[26];
r->sk_exp = malloc(sizeof(uint64_t) * PQC_AES192_STATESIZE);
if (r->sk_exp == NULL) {
exit(111);
}
br_aes_ct64_keysched(skey, key, 24);
br_aes_ct64_skey_expand(r->sk_exp, skey, 12);
}
void aes192_ctr_keyexp(aes192ctx *r, const unsigned char *key) {
void
aes192_ctr_keyexp(aes192ctx *r, const unsigned char *key)
{
aes192_ecb_keyexp(r, key);
}
void aes256_ecb_keyexp(aes256ctx *r, const unsigned char *key) {
void
aes256_ecb_keyexp(aes256ctx *r, const unsigned char *key)
{
uint64_t skey[30];
r->sk_exp = malloc(sizeof(uint64_t) * PQC_AES256_STATESIZE);
if (r->sk_exp == NULL) {
exit(111);
}
br_aes_ct64_keysched(skey, key, 32);
br_aes_ct64_skey_expand(r->sk_exp, skey, 14);
}
void aes256_ctr_keyexp(aes256ctx *r, const unsigned char *key) {
void
aes256_ctr_keyexp(aes256ctx *r, const unsigned char *key)
{
aes256_ecb_keyexp(r, key);
}
void aes128_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes128ctx *ctx) {
void
aes128_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes128ctx *ctx)
{
aes_ecb(out, in, nblocks, ctx->sk_exp, 10);
}
void aes128_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes128ctx *ctx) {
void
aes128_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes128ctx *ctx)
{
aes_ctr(out, outlen, iv, ctx->sk_exp, 10);
}
void aes192_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes192ctx *ctx) {
void
aes192_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes192ctx *ctx)
{
aes_ecb(out, in, nblocks, ctx->sk_exp, 12);
}
void aes192_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes192ctx *ctx) {
void
aes192_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes192ctx *ctx)
{
aes_ctr(out, outlen, iv, ctx->sk_exp, 12);
}
void aes256_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes256ctx *ctx) {
void
aes256_ecb(unsigned char *out, const unsigned char *in, size_t nblocks, const aes256ctx *ctx)
{
aes_ecb(out, in, nblocks, ctx->sk_exp, 14);
}
void aes256_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes256ctx *ctx) {
void
aes256_ctr(unsigned char *out, size_t outlen, const unsigned char *iv, const aes256ctx *ctx)
{
aes_ctr(out, outlen, iv, ctx->sk_exp, 14);
}
void aes128_ctx_release(aes128ctx *r) {
free(r->sk_exp);
void
aes128_ctx_release(aes128ctx *r)
{
}
void aes192_ctx_release(aes192ctx *r) {
free(r->sk_exp);
void
aes192_ctx_release(aes192ctx *r)
{
}
void aes256_ctx_release(aes256ctx *r) {
free(r->sk_exp);
void
aes256_ctx_release(aes256ctx *r)
{
}
int AES_128_CTR(unsigned char *output, size_t outputByteLen,
const unsigned char *input, size_t inputByteLen) {
int
AES_128_CTR(unsigned char *output,
size_t outputByteLen,
const unsigned char *input,
size_t inputByteLen)
{
aes128ctx ctx;
unsigned char iv[16] = { 0 };
const unsigned char iv[16] = { 0 };
aes128_ctr_keyexp(&ctx, input);
aes128_ctr(output, outputByteLen, iv, &ctx);
@@ -731,7 +772,9 @@ int AES_128_CTR(unsigned char *output, size_t outputByteLen,
return (int)outputByteLen;
}
void AES_256_ECB(const uint8_t *input, const unsigned char *key, unsigned char *output) {
void
AES_256_ECB(const uint8_t *input, const unsigned char *key, unsigned char *output)
{
aes256ctx ctx;
aes256_ecb_keyexp(&ctx, key);

View File

@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
#ifndef AES_H
#define AES_H
#include <stddef.h>
#include <stdint.h>
void AES_256_ECB(const uint8_t *input, const uint8_t *key, uint8_t *output);
#define AES_ECB_encrypt AES_256_ECB
#ifdef ENABLE_AESNI
int AES_128_CTR_NI(unsigned char *output,
size_t outputByteLen,
const unsigned char *input,
size_t inputByteLen);
int AES_128_CTR_4R_NI(unsigned char *output,
size_t outputByteLen,
const unsigned char *input,
size_t inputByteLen);
#define AES_128_CTR AES_128_CTR_NI
#else
int AES_128_CTR(unsigned char *output,
size_t outputByteLen,
const unsigned char *input,
size_t inputByteLen);
#endif
#endif

View File

@@ -0,0 +1,161 @@
// SPDX-License-Identifier: Apache-2.0 and Unknown
//
/*
NIST-developed software is provided by NIST as a public service. You may use,
copy, and distribute copies of the software in any medium, provided that you
keep intact this entire notice. You may improve, modify, and create derivative
works of the software or any portion of the software, and you may copy and
distribute such modifications or works. Modified works should carry a notice
stating that you changed the software and should note the date and nature of any
such change. Please explicitly acknowledge the National Institute of Standards
and Technology as the source of the software.
NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF
ANY KIND, EXPRESS, IMPLIED, IN FACT, OR ARISING BY OPERATION OF LAW, INCLUDING,
WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE, NON-INFRINGEMENT, AND DATA ACCURACY. NIST NEITHER REPRESENTS
NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR
ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE
ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF,
INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR
USEFULNESS OF THE SOFTWARE.
You are solely responsible for determining the appropriateness of using and
distributing the software and you assume all risks associated with its use,
including but not limited to the risks and costs of program errors, compliance
with applicable laws, damage to or loss of data, programs or equipment, and the
unavailability or interruption of operation. This software is not intended to be
used in any situation where a failure could cause risk of injury or damage to
property. The software developed by NIST employees is not subject to copyright
protection within the United States.
*/
#include <rng.h>
#include <string.h>
#include <aes.h>
#ifdef ENABLE_CT_TESTING
#include <valgrind/memcheck.h>
#endif
#define RNG_SUCCESS 0
#define RNG_BAD_MAXLEN -1
#define RNG_BAD_OUTBUF -2
#define RNG_BAD_REQ_LEN -3
static inline void AES256_ECB(const unsigned char *key,
const unsigned char *ctr, unsigned char *buffer) {
AES_ECB_encrypt(ctr, key, buffer);
}
typedef struct {
unsigned char Key[32];
unsigned char V[16];
int reseed_counter;
} AES256_CTR_DRBG_struct;
void AES256_CTR_DRBG_Update(const unsigned char *provided_data,
unsigned char *Key, unsigned char *V);
AES256_CTR_DRBG_struct DRBG_ctx;
#ifndef CTRDRBG_TEST_BENCH
static
#endif
void
randombytes_init_nist(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
unsigned char seed_material[48];
(void)security_strength; // Unused parameter
memcpy(seed_material, entropy_input, 48);
if (personalization_string)
for (int i = 0; i < 48; i++) {
seed_material[i] ^= personalization_string[i];
}
memset(DRBG_ctx.Key, 0x00, 32);
memset(DRBG_ctx.V, 0x00, 16);
AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter = 1;
}
#ifndef CTRDRBG_TEST_BENCH
static
#endif
int
randombytes_nist(unsigned char *x, size_t xlen) {
unsigned char block[16];
size_t i = 0;
while (xlen > 0) {
// increment V
for (int j = 15; j >= 0; j--) {
if (DRBG_ctx.V[j] == 0xff) {
DRBG_ctx.V[j] = 0x00;
} else {
DRBG_ctx.V[j]++;
break;
}
}
AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block);
if (xlen > 15) {
memcpy(x + i, block, 16);
i += 16;
xlen -= 16;
} else {
memcpy(x + i, block, xlen);
i += xlen;
xlen = 0;
}
}
AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter++;
return 0;
}
void AES256_CTR_DRBG_Update(const unsigned char *provided_data,
unsigned char *Key, unsigned char *V) {
unsigned char temp[48];
for (int i = 0; i < 3; i++) {
// increment V
for (int j = 15; j >= 0; j--) {
if (V[j] == 0xff) {
V[j] = 0x00;
} else {
V[j]++;
break;
}
}
AES256_ECB(Key, V, temp + 16 * i);
}
if (provided_data != NULL)
for (int i = 0; i < 48; i++) {
temp[i] ^= provided_data[i];
}
memcpy(Key, temp, 32);
memcpy(V, temp + 32, 16);
}
#ifdef RANDOMBYTES_C
SQISIGN_API
int randombytes(unsigned char *random_array, unsigned long long nbytes) {
int ret = randombytes_nist(random_array, nbytes);
#ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED(random_array, ret);
#endif
return ret;
}
SQISIGN_API
void randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength) {
randombytes_init_nist(entropy_input, personalization_string,
security_strength);
}
#endif

2
src/ec/ref/CMakeLists.txt Executable file → Normal file
View File

@@ -1,3 +1,3 @@
set(ECX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ecx)
set(LVLX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lvlx)
include(${SELECT_SQISIGN_VARIANT})

View File

@@ -1,508 +0,0 @@
#include "isog.h"
static void xTPL(ec_point_t* Q, const ec_point_t* P, const ec_point_t* A3)
{
/* ----------------------------------------------------------------------------- *
* Differential point tripling given the montgomery coefficient A3 = (A+2C:A-2C)
* ----------------------------------------------------------------------------- */
fp2_t t0, t1, t2, t3, t4;
fp2_sub(&t0, &P->x, &P->z);
fp2_sqr(&t2, &t0);
fp2_add(&t1, &P->x, &P->z);
fp2_sqr(&t3, &t1);
fp2_add(&t4, &t1, &t0);
fp2_sub(&t0, &t1, &t0);
fp2_sqr(&t1, &t4);
fp2_sub(&t1, &t1, &t3);
fp2_sub(&t1, &t1, &t2);
fp2_mul(&Q->x, &t3, &A3->x);
fp2_mul(&t3, &Q->x, &t3);
fp2_mul(&Q->z, &t2, &A3->z);
fp2_mul(&t2, &t2, &Q->z);
fp2_sub(&t3, &t2, &t3);
fp2_sub(&t2, &Q->x, &Q->z);
fp2_mul(&t1, &t2, &t1);
fp2_add(&t2, &t3, &t1);
fp2_sqr(&t2, &t2);
fp2_mul(&Q->x, &t2, &t4);
fp2_sub(&t1, &t3, &t1);
fp2_sqr(&t1, &t1);
fp2_mul(&Q->z, &t1, &t0);
}
int ec_is_on_curve(const ec_curve_t* curve, const ec_point_t* P){
fp2_t t0, t1, t2;
// Check if xz*(C^2x^2+zACx+z^2C^2) is a square
fp2_mul(&t0, &curve->C, &P->x);
fp2_mul(&t1, &t0, &P->z);
fp2_mul(&t1, &t1, &curve->A);
fp2_mul(&t2, &curve->C, &P->z);
fp2_sqr(&t0, &t0);
fp2_sqr(&t2, &t2);
fp2_add(&t0, &t0, &t1);
fp2_add(&t0, &t0, &t2);
fp2_mul(&t0, &t0, &P->x);
fp2_mul(&t0, &t0, &P->z);
return fp2_is_square(&t0);
}
static void difference_point(ec_point_t* PQ, const ec_point_t* P, const ec_point_t* Q, const ec_curve_t* curve){
// Given P,Q in affine x-only, computes a deterministic choice for (P-Q)
// The points must be normalized to z=1 and the curve to C=1
fp2_t t0, t1, t2, t3;
fp2_sub(&PQ->z, &P->x, &Q->x); // P - Q
fp2_mul(&t2, &P->x, &Q->x); // P*Q
fp_mont_setone(t1.re);
fp_set(t1.im, 0);
fp2_sub(&t3, &t2, &t1); // P*Q-1
fp2_mul(&t0, &PQ->z, &t3); // (P-Q)*(P*Q-1)
fp2_sqr(&PQ->z, &PQ->z); // (P-Q)^2
fp2_sqr(&t0, &t0); // (P-Q)^2*(P*Q-1)^2
fp2_add(&t1, &t2, &t1); // P*Q+1
fp2_add(&t3, &P->x, &Q->x); // P+Q
fp2_mul(&t1, &t1, &t3); // (P+Q)*(P*Q+1)
fp2_mul(&t2, &t2, &curve->A); // A*P*Q
fp2_add(&t2, &t2, &t2); // 2*A*P*Q
fp2_add(&t1, &t1, &t2); // (P+Q)*(P*Q+1) + 2*A*P*Q
fp2_sqr(&t2, &t1); // ((P+Q)*(P*Q+1) + 2*A*P*Q)^2
fp2_sub(&t0, &t2, &t0); // ((P+Q)*(P*Q+1) + 2*A*P*Q)^2 - (P-Q)^2*(P*Q-1)^2
fp2_sqrt(&t0);
fp2_add(&PQ->x, &t0, &t1);
}
void ec_curve_to_basis_2(ec_basis_t *PQ2, const ec_curve_t *curve){
fp2_t x, t0, t1, t2;
ec_point_t P, Q, Q2, P2, A24;
// Curve coefficient in the form A24 = (A+2C:4C)
fp2_add(&A24.z, &curve->C, &curve->C);
fp2_add(&A24.x, &curve->A, &A24.z);
fp2_add(&A24.z, &A24.z, &A24.z);
fp_mont_setone(x.re);
fp_set(x.im, 0);
// Find P
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&P.x, &x);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
}
else
continue;
// Clear odd factors from the order
xMULv2(&P, &P, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, &A24);
// Check if point has order 2^f
copy_point(&P2, &P);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&P2, &P2, &A24);
if(ec_is_zero(&P2))
continue;
else
break;
}
// Find Q
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&Q.x, &x);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
}
else
continue;
// Clear odd factors from the order
xMULv2(&Q, &Q, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, &A24);
// Check if point has order 2^f
copy_point(&Q2, &Q);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&Q2, &Q2, &A24);
if(ec_is_zero(&Q2))
continue;
// Check if point is orthogonal to P
if(is_point_equal(&P2, &Q2))
continue;
else
break;
}
// Normalize points
ec_curve_t E;
fp2_mul(&t0, &P.z, &Q.z);
fp2_mul(&t1, &t0, &curve->C);
fp2_inv(&t1);
fp2_mul(&P.x, &P.x, &t1);
fp2_mul(&Q.x, &Q.x, &t1);
fp2_mul(&E.A, &curve->A, &t1);
fp2_mul(&P.x, &P.x, &Q.z);
fp2_mul(&P.x, &P.x, &curve->C);
fp2_mul(&Q.x, &Q.x, &P.z);
fp2_mul(&Q.x, &Q.x, &curve->C);
fp2_mul(&E.A, &E.A, &t0);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_copy(&Q.z, &P.z);
fp2_copy(&E.C, &P.z);
// Compute P-Q
difference_point(&PQ2->PmQ, &P, &Q, &E);
copy_point(&PQ2->P, &P);
copy_point(&PQ2->Q, &Q);
}
void ec_complete_basis_2(ec_basis_t* PQ2, const ec_curve_t* curve, const ec_point_t* P){
fp2_t x, t0, t1, t2;
ec_point_t Q, Q2, P2, A24;
// Curve coefficient in the form A24 = (A+2C:4C)
fp2_add(&A24.z, &curve->C, &curve->C);
fp2_add(&A24.x, &curve->A, &A24.z);
fp2_add(&A24.z, &A24.z, &A24.z);
// Point of order 2 generated by P
copy_point(&P2, P);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&P2, &P2, &A24);
// Find Q
fp_mont_setone(x.re);
fp_set(x.im, 0);
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&Q.x, &x);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
}
else
continue;
// Clear odd factors from the order
xMULv2(&Q, &Q, p_cofactor_for_2f, (int)P_COFACTOR_FOR_2F_BITLENGTH, &A24);
// Check if point has order 2^f
copy_point(&Q2, &Q);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&Q2, &Q2, &A24);
if(ec_is_zero(&Q2))
continue;
// Check if point is orthogonal to P
if(is_point_equal(&P2, &Q2))
continue;
else
break;
}
// Normalize points
ec_curve_t E;
ec_point_t PP;
fp2_mul(&t0, &P->z, &Q.z);
fp2_mul(&t1, &t0, &curve->C);
fp2_inv(&t1);
fp2_mul(&PP.x, &P->x, &t1);
fp2_mul(&Q.x, &Q.x, &t1);
fp2_mul(&E.A, &curve->A, &t1);
fp2_mul(&PP.x, &PP.x, &Q.z);
fp2_mul(&PP.x, &PP.x, &curve->C);
fp2_mul(&Q.x, &Q.x, &P->z);
fp2_mul(&Q.x, &Q.x, &curve->C);
fp2_mul(&E.A, &E.A, &t0);
fp_mont_setone(PP.z.re);
fp_set(PP.z.im, 0);
fp2_copy(&Q.z, &PP.z);
fp2_copy(&E.C, &PP.z);
// Compute P-Q
difference_point(&PQ2->PmQ, &PP, &Q, &E);
copy_point(&PQ2->P, &PP);
copy_point(&PQ2->Q, &Q);
}
void ec_curve_to_basis_3(ec_basis_t* PQ3, const ec_curve_t* curve){
fp2_t x, t0, t1, t2;
ec_point_t P, Q, Q3, P3, A24, A3;
// Curve coefficient in the form A24 = (A+2C:4C)
fp2_add(&A24.z, &curve->C, &curve->C);
fp2_add(&A24.x, &curve->A, &A24.z);
fp2_add(&A24.z, &A24.z, &A24.z);
// Curve coefficient in the form A3 = (A+2C:A-2C)
fp2_sub(&A3.z, &A24.x, &A24.z);
fp2_copy(&A3.x, &A24.x);
fp_mont_setone(x.re);
fp_set(x.im, 0);
// Find P
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&P.x, &x);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
}
else
continue;
// Clear non-3 factors from the order
xMULv2(&P, &P, p_cofactor_for_3g, (int)P_COFACTOR_FOR_3G_BITLENGTH, &A24);
// Check if point has order 3^g
copy_point(&P3, &P);
for(int i = 0; i < POWER_OF_3 - 1; i++)
xTPL(&P3, &P3, &A3);
if(ec_is_zero(&P3))
continue;
else
break;
}
// Find Q
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&Q.x, &x);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
}
else
continue;
// Clear non-3 factors from the order
xMULv2(&Q, &Q, p_cofactor_for_3g, (int)P_COFACTOR_FOR_3G_BITLENGTH, &A24);
// Check if point has order 3^g
copy_point(&Q3, &Q);
for(int i = 0; i < POWER_OF_3 - 1; i++)
xTPL(&Q3, &Q3, &A3);
if(ec_is_zero(&Q3))
continue;
// Check if point is orthogonal to P
if(is_point_equal(&P3, &Q3))
continue;
xDBLv2(&P3, &P3, &A24);
if(is_point_equal(&P3, &Q3))
continue;
else
break;
}
// Normalize points
ec_curve_t E;
fp2_mul(&t0, &P.z, &Q.z);
fp2_mul(&t1, &t0, &curve->C);
fp2_inv(&t1);
fp2_mul(&P.x, &P.x, &t1);
fp2_mul(&Q.x, &Q.x, &t1);
fp2_mul(&E.A, &curve->A, &t1);
fp2_mul(&P.x, &P.x, &Q.z);
fp2_mul(&P.x, &P.x, &curve->C);
fp2_mul(&Q.x, &Q.x, &P.z);
fp2_mul(&Q.x, &Q.x, &curve->C);
fp2_mul(&E.A, &E.A, &t0);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_copy(&Q.z, &P.z);
fp2_copy(&E.C, &P.z);
// Compute P-Q
difference_point(&PQ3->PmQ, &P, &Q, &E);
copy_point(&PQ3->P, &P);
copy_point(&PQ3->Q, &Q);
}
void ec_curve_to_basis_6(ec_basis_t* PQ6, const ec_curve_t* curve){
fp2_t x, t0, t1, t2;
ec_point_t P, Q, Q6, P6, R, T, A24, A3;
// Curve coefficient in the form A24 = (A+2C:4C)
fp2_add(&A24.z, &curve->C, &curve->C);
fp2_add(&A24.x, &curve->A, &A24.z);
fp2_add(&A24.z, &A24.z, &A24.z);
// Curve coefficient in the form A3 = (A+2C:A-2C)
fp2_sub(&A3.z, &A24.x, &A24.z);
fp2_copy(&A3.x, &A24.x);
fp_mont_setone(x.re);
fp_set(x.im, 0);
// Find P
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&P.x, &x);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
}
else
continue;
// Clear non-2 factors and non-3 factors from the order
xMULv2(&P, &P, p_cofactor_for_6fg, (int)P_COFACTOR_FOR_6FG_BITLENGTH, &A24);
// Check if point has order 2^f*3^g
copy_point(&P6, &P);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&P6, &P6, &A24);
for(int i = 0; i < POWER_OF_3 - 1; i++)
xTPL(&P6, &P6, &A3);
if(ec_is_zero(&P6))
continue;
xDBLv2(&T, &P6, &A24);
if (ec_is_zero(&T))
continue;
xTPL(&T, &P6, &A3);
if (ec_is_zero(&T))
continue;
break;
}
// Find Q
while(1){
fp_add(x.im, x.re, x.im);
// Check if point is rational
fp2_sqr(&t0, &curve->C);
fp2_mul(&t1, &t0, &x);
fp2_mul(&t2, &curve->A, &curve->C);
fp2_add(&t1, &t1, &t2);
fp2_mul(&t1, &t1, &x);
fp2_add(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &x);
if(fp2_is_square(&t1)){
fp2_copy(&Q.x, &x);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
}
else
continue;
// Clear non-6 factors from the order
xMULv2(&Q, &Q, p_cofactor_for_6fg, (int)P_COFACTOR_FOR_6FG_BITLENGTH, &A24);
// Check first if point has order 2^f*3^g
copy_point(&Q6, &Q);
for(int i = 0; i < POWER_OF_2 - 1; i++)
xDBLv2(&Q6, &Q6, &A24);
for(int i = 0; i < POWER_OF_3 - 1; i++)
xTPL(&Q6, &Q6, &A3);
if(ec_is_zero(&Q6))
continue;
xDBLv2(&T, &Q6, &A24);
if (ec_is_zero(&T))
continue;
xTPL(&T, &Q6, &A3);
if (ec_is_zero(&T))
continue;
// Check if point P is independent from point Q
xTPL(&R, &P6, &A3);
xTPL(&T, &Q6, &A3);
if(is_point_equal(&R, &T))
continue;
xDBLv2(&R, &P6, &A24);
xDBLv2(&T, &Q6, &A24);
if(is_point_equal(&R, &T))
continue;
break;
}
// Normalize points
ec_curve_t E;
fp2_mul(&t0, &P.z, &Q.z);
fp2_mul(&t1, &t0, &curve->C);
fp2_inv(&t1);
fp2_mul(&P.x, &P.x, &t1);
fp2_mul(&Q.x, &Q.x, &t1);
fp2_mul(&E.A, &curve->A, &t1);
fp2_mul(&P.x, &P.x, &Q.z);
fp2_mul(&P.x, &P.x, &curve->C);
fp2_mul(&Q.x, &Q.x, &P.z);
fp2_mul(&Q.x, &Q.x, &curve->C);
fp2_mul(&E.A, &E.A, &t0);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_copy(&Q.z, &P.z);
fp2_copy(&E.C, &P.z);
// Compute P-Q
difference_point(&PQ6->PmQ, &P, &Q, &E);
copy_point(&PQ6->P, &P);
copy_point(&PQ6->Q, &Q);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +0,0 @@
#include <assert.h>
#include <time.h>
#include <stdio.h>
#include "../generic/include/fp2_tmp.h"
int main()
{
fp2_t fp2_0, fp2_1;
// ------------
fp2_set0(fp2_0);
fp2_set1(fp2_1);
// ------------
int i;
fp2_t a, b, c, d;
fp_t e;
for (i = 0; i < 1024; i++)
{
printf("[%3d%%] Testing fp2_t arithmetic", 100 * i / (int)1024);
fflush(stdout);
printf("\r\x1b[K");
// Random elements of fp
fp2_random(a);
fp2_random(b);
fp2_copy(c, a);
c.re[0] += 1;
fp2_copy(d, b);
d.re[0] -= 1;
assert(fp2_isequal(a,b) == 0); // different values check --> (a != b)
assert(fp2_isequal(c,c) == 1); // equal values check --> 1 (c == c)
// Testing neg
fp2_set0(b);
fp2_copy(c, a);
fp2_neg(a, a);
fp2_sub(c, b, c);
assert(fp2_isequal(a,c) == 1);
fp2_set1(a); // Now a == 1
fp2_set0(b); // Now b == 0
assert(fp2_is_zero(a) == 0);
assert(fp2_is_zero(b) == 1);
// testing c - c
fp2_sub(d, c, c);
assert(fp2_is_zero(d) == 1);
// tetsing c * 0
fp2_mul(d, c, b);
assert(fp2_is_zero(d) == 1);
// tetsing c * 1 ... recall, in Montgomery domain R mod p plays the role of the 1
fp2_set1(a);
fp2_mul(d, c, a);
assert(fp2_isequal(d, c) == 1);
// fp_set(e, 1); // Now e == 1
// fp2_pow(d, e, c);
// assert(fp2_isequal(d, c) == 1);
// fp_set(e, 0); // Now e == 0
// fp2_pow(d, e, c);
// assert(fp2_isone(d) == 1);
// fp2_set(a, 1); // Now e == R mod p
// fp_random(e);
// fp2_pow(d, e, a);
// assert(fp2_isone(d) == 1);
// Testing 1/a by computing (1/a) x a
fp2_random(a);
fp2_copy(b, a);
fp2_inv(a);
fp2_mul(c, a, b);
assert(fp2_isone(c) == 1);
fp2_random(a);
fp2_sqr(b, a);
assert( fp2_issquare(b) );
};
printf("[%2d%%] Tested fp2_t arithmetic:\tNo errors!\n", 100 * i / (int)1024);
printf("-- All tests passed.\n");
return 0;
}

View File

@@ -1,298 +0,0 @@
#include "isog.h"
#include <assert.h>
static inline void AC_to_A24(ec_point_t *A24, ec_curve_t const *E)
{
// A24 = (A+2C : 4C)
fp2_add(&A24->z, &E->C, &E->C);
fp2_add(&A24->x, &E->A, &A24->z);
fp2_add(&A24->z, &A24->z, &A24->z);
}
static inline void A24_to_AC(ec_curve_t *E, ec_point_t const *A24)
{
// (A:C) = ((A+2C)*2-4C : 4C)
fp2_add(&E->A, &A24->x, &A24->x);
fp2_sub(&E->A, &E->A, &A24->z);
fp2_add(&E->A, &E->A, &E->A);
fp2_copy(&E->C, &A24->z);
}
void ec_eval_even(ec_curve_t* image, const ec_isog_even_t* phi,
ec_point_t* points, unsigned short length){
ec_point_t Q4, Q, A24;
copy_point(&Q4, &phi->kernel);
AC_to_A24(&A24, &phi->curve);
for(int i = 0; i < phi->length - 2; i++)
xDBLv2(&Q4, &Q4, &A24);
xDBLv2(&Q, &Q4, &A24);
if(fp2_is_zero(&Q.x)){
xisog_4_singular(&A24, Q4, A24);
xeval_4_singular(points, points, length, Q4);
xeval_4_singular(&Q, &phi->kernel, 1, Q4);
}
else{
xisog_4(&A24, Q4);
xeval_4(points, points, length);
xeval_4(&Q, &phi->kernel, 1);
}
ec_eval_even_strategy(image, points, length, &A24, &Q, phi->length-2);
}
void ec_eval_even_nonzero(ec_curve_t* image, const ec_isog_even_t* phi,
ec_point_t* points, unsigned short length){
ec_point_t Q4, A24;
copy_point(&Q4, &phi->kernel);
AC_to_A24(&A24, &phi->curve);
for(int i = 0; i < phi->length - 2; i++)
xDBLv2(&Q4, &Q4, &A24);
xisog_4(&A24, Q4);
xeval_4(points, points, length);
xeval_4(&Q4, &phi->kernel, 1);
ec_eval_even_strategy(image, points, length, &A24, &Q4, phi->length-2);
}
static void ec_eval_even_strategy(ec_curve_t* image, ec_point_t* points, unsigned short points_len,
ec_point_t* A24, const ec_point_t *kernel, const int isog_len){
assert(isog_len == POWER_OF_2-2);
uint8_t log2_of_e, tmp;
fp2_t t0;
digit_t e_half = (isog_len)>>1;
for(tmp = e_half, log2_of_e = 0; tmp > 0; tmp>>=1, ++log2_of_e);
log2_of_e *= 2; // In order to ensure each splits is at most size log2_of_e
ec_point_t SPLITTING_POINTS[log2_of_e], K2;
copy_point(&SPLITTING_POINTS[0], kernel);
int strategy = 0, // Current element of the strategy to be used
i, j;
int BLOCK = 0, // Keeps track of point order
current = 0; // Number of points being carried
int XDBLs[log2_of_e]; // Number of doubles performed
// If walk length is odd, we start with a 2-isogeny
if(isog_len & 1){
copy_point(&SPLITTING_POINTS[1], &SPLITTING_POINTS[0]);
for(i = 0; i < isog_len-1; i++)
xDBLv2(&SPLITTING_POINTS[1], &SPLITTING_POINTS[1], A24);
xisog_2(A24, SPLITTING_POINTS[1]);
xeval_2(SPLITTING_POINTS, SPLITTING_POINTS, 1);
xeval_2(points, points, points_len);
}
// Chain of 4-isogenies
for(j = 0; j < (e_half - 1); j++)
{
// Get the next point of order 4
while (BLOCK != (e_half - 1 - j) )
{
// A new split will be added
current += 1;
// We set the seed of the new split to be computed and saved
copy_point(&SPLITTING_POINTS[current], &SPLITTING_POINTS[current - 1]);
for(i = 0; i < 2*STRATEGY4[strategy]; i++)
xDBLv2(&SPLITTING_POINTS[current], &SPLITTING_POINTS[current], A24);
XDBLs[current] = STRATEGY4[strategy]; // The number of doublings performed is saved
BLOCK += STRATEGY4[strategy]; // BLOCK is increased by the number of doublings performed
strategy += 1; // Next, we move to the next element of the strategy
}
// Evaluate 4-isogeny
xisog_4(A24, SPLITTING_POINTS[current]);
xeval_4(SPLITTING_POINTS, SPLITTING_POINTS, current);
xeval_4(points, points, points_len);
BLOCK -= XDBLs[current];
XDBLs[current] = 0;
current -= 1;
}
// Final 4-isogeny
xisog_4(A24, SPLITTING_POINTS[current]);
xeval_4(points, points, points_len);
// Output curve in the form (A:C)
A24_to_AC(image, A24);
}
void ec_eval_odd(ec_curve_t* image, const ec_isog_odd_t* phi,
ec_point_t* points, unsigned short length){
ec_point_t ker_plus, ker_minus, P, K, A24, B24;
int i,j,k;
AC_to_A24(&A24, &phi->curve);
// Isogenies with kernel in E[p+1]
copy_point(&ker_plus, &phi->ker_plus);
copy_point(&ker_minus, &phi->ker_minus);
for(i = 0; i < P_LEN; i++){
copy_point(&P, &ker_plus);
for(j = i+1; j < P_LEN; j++){
for(k = 0; k < phi->degree[j]; k++)
xMULv2(&P, &P, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A24);
}
for(k = 0; k < phi->degree[i]; k++){
copy_point(&K, &P);
for(j = 0; j < phi->degree[i]-k-1; j++)
xMULv2(&K, &K, &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], &A24);
kps(i, K, A24);
xisog(&B24, i, A24);
xeval(&P, i, P, A24);
xeval(&ker_plus, i, ker_plus, A24);
xeval(&ker_minus, i, ker_minus, A24);
for(j = 0; j < length; j++)
xeval(&points[j], i, points[j], A24);
copy_point(&A24, &B24);
kps_clear(i);
}
}
// Isogenies with kernel in E[p-1]
for(i = P_LEN; i < P_LEN+M_LEN; i++){
copy_point(&P, &ker_minus);
for(j = i+1; j < P_LEN+M_LEN; j++){
for(k = 0; k < phi->degree[j]; k++)
xMULv2(&P, &P, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A24);
}
for(k = 0; k < phi->degree[i]; k++){
copy_point(&K, &P);
for(j = 0; j < phi->degree[i]-k-1; j++)
xMULv2(&K, &K, &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], &A24);
kps(i, K, A24);
xisog(&B24, i, A24);
xeval(&P, i, P, A24);
xeval(&ker_minus, i, ker_minus, A24);
for(j = 0; j < length; j++)
xeval(&points[j], i, points[j], A24);
copy_point(&A24, &B24);
kps_clear(i);
}
}
A24_to_AC(image, &A24);
}
void ec_curve_normalize(ec_curve_t *new, ec_isom_t *isom, const ec_curve_t *old){
fp2_t t0, t1, t2, t3, t4, t5;
// Compute the other solutions:
// A'^2 = [ sqrt(A^2-4C^2)*(9C^2-A^2) +- (A^3-3AC^2) ] / [ 2C^2*sqrt(A^2-4C^2) ]
fp2_sqr(&t0, &old->C); //C^2
fp2_add(&t1, &t0, &t0); //2C^2
fp2_add(&t2, &t1, &t1); //4C^2
fp2_sqr(&t3, &old->A); //A^2
fp2_sub(&t2, &t3, &t2); //A^2-4C^2
fp2_sqrt(&t2); //sqrt(A^2-4C^2)
fp2_add(&t0, &t0, &t1); //3C^2
fp2_mul(&t1, &t2, &t1); //2C^2*sqrt(A^2-4C^2)
fp2_sub(&t5, &t3, &t0); //A^2-3C^2
fp2_mul(&t5, &t5, &old->A); //A^3-3AC^2
fp2_add(&t4, &t0, &t0); //6C^2
fp2_add(&t0, &t4, &t0); //9C^2
fp2_sub(&t0, &t0, &t3); //9C^2-A^2
fp2_add(&t3, &t3, &t3); //2A^2
fp2_mul(&t3, &t3, &t2); //2A^2*sqrt(A^2-4C^2)
fp2_mul(&t2, &t2, &t0); //sqrt(A^2-4C^2)*(9C^2-A^2)
fp2_add(&t0, &t2, &t5); //sqrt(A^2-4C^2)*(9C^2-A^2) + (A^3-3AC^2)
fp2_sub(&t2, &t2, &t5); //sqrt(A^2-4C^2)*(9C^2-A^2) - (A^3-3AC^2)
fp2_inv(&t1); //1/2C^2*sqrt(A^2-4C^2)
fp2_mul(&t0, &t0, &t1); // First solution
fp2_mul(&t2, &t2, &t1); // Second solution
fp2_mul(&t1, &t3, &t1); // Original solution
// Chose the lexicographically first solution
if(fp2_cmp(&t0, &t1)==1)
fp2_copy(&t0, &t1);
if(fp2_cmp(&t0, &t2)==1)
fp2_copy(&t0, &t2);
// Copy the solution
fp2_sqrt(&t0);
ec_curve_t E;
fp2_copy(&E.A, &t0);
fp_mont_setone(E.C.re);
fp_set(E.C.im, 0);
ec_isomorphism(isom, old, &E);
fp2_copy(&new->A, &E.A);
fp2_copy(&new->C, &E.C);
}
void ec_isomorphism(ec_isom_t* isom, const ec_curve_t* from, const ec_curve_t* to){
fp2_t t0, t1, t2, t3, t4;
fp2_mul(&t0, &from->A, &to->C);
fp2_sqr(&t0, &t0); //fromA^2toC^2
fp2_mul(&t1, &to->A, &from->C);
fp2_sqr(&t1, &t1); //toA^2fromC^2
fp2_mul(&t2, &to->C, &from->C);
fp2_sqr(&t2, &t2); //toC^2fromC^2
fp2_add(&t3, &t2, &t2);
fp2_add(&t2, &t3, &t2); //3toC^2fromC^2
fp2_sub(&t3, &t2, &t0); //3toC^2fromC^2-fromA^2toC^2
fp2_sub(&t4, &t2, &t1); //3toC^2fromC^2-toA^2fromC^2
fp2_inv(&t3);
fp2_mul(&t4, &t4, &t3);
fp2_sqrt(&t4); //lambda^2 constant for SW isomorphism
fp2_sqr(&t3, &t4);
fp2_mul(&t3, &t3, &t4); //lambda^6
// Check sign of lambda^2, such that lambda^6 has the right sign
fp2_sqr(&t0, &from->C);
fp2_add(&t1, &t0, &t0);
fp2_add(&t1, &t1, &t1);
fp2_add(&t1, &t1, &t1);
fp2_add(&t0, &t0, &t1); // 9fromC^2
fp2_sqr(&t2, &from->A);
fp2_add(&t2, &t2, &t2); // 2fromA^2
fp2_sub(&t2, &t2, &t0);
fp2_mul(&t2, &t2, &from->A); // -9fromC^2fromA+2fromA^3
fp2_sqr(&t0, &to->C);
fp2_mul(&t0, &t0, &to->C);
fp2_mul(&t2, &t2, &t0); //toC^3* [-9fromC^2fromA+2fromA^3]
fp2_mul(&t3, &t3, &t2); //lambda^6*(-9fromA+2fromA^3)*toC^3
fp2_sqr(&t0, &to->C);
fp2_add(&t1, &t0, &t0);
fp2_add(&t1, &t1, &t1);
fp2_add(&t1, &t1, &t1);
fp2_add(&t0, &t0, &t1); // 9toC^2
fp2_sqr(&t2, &to->A);
fp2_add(&t2, &t2, &t2); // 2toA^2
fp2_sub(&t2, &t2, &t0);
fp2_mul(&t2, &t2, &to->A); // -9toC^2toA+2toA^3
fp2_sqr(&t0, &from->C);
fp2_mul(&t0, &t0, &from->C);
fp2_mul(&t2, &t2, &t0); //fromC^3* [-9toC^2toA+2toA^3]
if(!fp2_is_equal(&t2, &t3))
fp2_neg(&t4, &t4);
// Mont -> SW -> SW -> Mont
fp_mont_setone(t0.re);
fp_set(t0.im, 0);
fp2_add(&isom->D, &t0, &t0);
fp2_add(&isom->D, &isom->D, &t0);
fp2_mul(&isom->D, &isom->D, &from->C);
fp2_mul(&isom->D, &isom->D, &to->C);
fp2_mul(&isom->Nx, &isom->D, &t4);
fp2_mul(&t4, &t4, &from->A);
fp2_mul(&t4, &t4, &to->C);
fp2_mul(&t0, &to->A, &from->C);
fp2_sub(&isom->Nz, &t0, &t4);
}
void ec_iso_inv(ec_isom_t* isom){
fp2_t tmp;
fp2_copy(&tmp, &isom->D);
fp2_copy(&isom->D, &isom->Nx);
fp2_copy(&isom->Nx, &tmp);
fp2_neg(&isom->Nz, &isom->Nz);
}
void ec_iso_eval(ec_point_t *P, ec_isom_t* isom){
fp2_t tmp;
fp2_mul(&P->x, &P->x, &isom->Nx);
fp2_mul(&tmp, &P->z, &isom->Nz);
fp2_sub(&P->x, &P->x, &tmp);
fp2_mul(&P->z, &P->z, &isom->D);
}

View File

@@ -1,228 +0,0 @@
#include "isog.h"
#include "curve_extras.h"
#include <assert.h>
int sI, sJ, sK; // Sizes of each current I, J, and K
fp2_t I[sI_max][2], // I plays also as the linear factors of the polynomial h_I(X)
EJ_0[sJ_max][3], EJ_1[sJ_max][3]; // To be used in xisog y xeval
ec_point_t J[sJ_max], K[sK_max]; // Finite subsets of the kernel
fp2_t XZJ4[sJ_max], // -4* (Xj * Zj) for each j in J, and x([j]P) = (Xj : Zj)
rtree_A[(1 << (ceil_log_sI_max+2)) - 1], // constant multiple of the reciprocal tree computation
A0; // constant multiple of the reciprocal R0
poly ptree_hI[(1 << (ceil_log_sI_max+2)) - 1], // product tree of h_I(X)
rtree_hI[(1 << (ceil_log_sI_max+2)) - 1], // reciprocal tree of h_I(X)
ptree_EJ[(1 << (ceil_log_sJ_max+2)) - 1]; // product tree of E_J(X)
fp2_t R0[2*sJ_max + 1]; // Reciprocal of h_I(X) required in the scaled remainder tree approach
int deg_ptree_hI[(1 << (ceil_log_sI_max+2)) - 1], // degree of each noed in the product tree of h_I(X)
deg_ptree_EJ[(1 << (ceil_log_sJ_max+2)) - 1]; // degree of each node in the product tree of E_J(X)
fp2_t leaves[sI_max]; // leaves of the remainder tree, which are required in the Resultant computation
// -----------------------------------------------------------
// -----------------------------------------------------------
// Traditional Kernel Point computation (KPs)
// Kernel computation required in tye degree-4 isogeny evaluation
void kps_4(ec_point_t const P)
{
fp2_sub(&K[1].x, &P.x, &P.z);
fp2_add(&K[2].x, &P.x, &P.z);
fp2_sqr(&K[0].x, &P.z);
fp2_add(&K[0].z, &K[0].x, &K[0].x);
fp2_add(&K[0].x, &K[0].z, &K[0].z);
}
void eds2mont(ec_point_t* P)
{
fp2_t t;
fp2_add(&t, &(P->z), &(P->x));
fp2_sub(&(P->z), &(P->z), &(P->x));
fp2_copy(&(P->x), &t);
}
// Differential doubling in Twisted Edwards model
void ydbl(ec_point_t* Q, ec_point_t* const P, ec_point_t const* A)
{
fp2_t t_0, t_1, X, Z;
fp2_sqr(&t_0, &(P->x));
fp2_sqr(&t_1, &(P->z));
fp2_mul(&Z, &(A->z), &t_0);
fp2_mul(&X, &Z, &t_1);
fp2_sub(&t_1, &t_1, &t_0);
fp2_mul(&t_0, &(A->x), &t_1);
fp2_add(&Z, &Z, &t_0);
fp2_mul(&Z, &Z, &t_1);
fp2_sub(&(Q->x), &X, &Z);
fp2_add(&(Q->z), &X, &Z);
}
// Differential addition in Twisted Edwards model
void yadd(ec_point_t* R, ec_point_t* const P, ec_point_t* const Q, ec_point_t* const PQ)
{
fp2_t a, b, c, d, X, Z;
fp2_mul(&a, &(P->z), &(Q->x));
fp2_mul(&b, &(P->x), &(Q->z));
fp2_add(&c, &a, &b);
fp2_sub(&d, &a, &b);
fp2_sqr(&c, &c);
fp2_sqr(&d, &d);
fp2_add(&a, &(PQ->z), &(PQ->x));
fp2_sub(&b, &(PQ->z), &(PQ->x));
fp2_mul(&X, &b, &c);
fp2_mul(&Z, &a, &d);
fp2_sub(&(R->x), &X, &Z);
fp2_add(&(R->z), &X, &Z);
}
// tvelu formulae
void kps_t(uint64_t const i, ec_point_t const P, ec_point_t const A)
{
int j;
int d = ((int)TORSION_ODD_PRIMES[i] - 1) / 2;
// Mapping the input point x(P), which belongs to a
// Montogmery curve model, into its Twisted Edwards
// representation y(P)
fp2_sub(&K[0].x, &P.x, &P.z);
fp2_add(&K[0].z, &P.x, &P.z);
ydbl(&K[1], &K[0], &A); // y([2]P)
for (j = 2; j < d; j++)
yadd(&K[j], &K[j - 1], &K[0], &K[j - 2]); // y([j+1]P)
}
// -----------------------------------------------------------
// -----------------------------------------------------------
// Kernel Point computation (KPs) used in velu SQRT
void kps_s(uint64_t const i, ec_point_t const P, ec_point_t const A)
{
// =================================================================================
assert(TORSION_ODD_PRIMES[i] > gap); // Ensuring velusqrt is used for l_i > gap
// The optimal bounds must corresponds to sI, sJ, and sK
sI = sizeI[i]; // Size of I
sJ = sizeJ[i]; // Size of J
sK = sizeK[i]; // Size of K
assert(sI >= sJ); // Ensuring #I >= #J
assert(sK >= 0); // Recall, it must be that #K >= 0
assert(sJ > 1); // ensuring sI >= sJ > 1
// =================================================================================
// Now, we can proceed by the general case
int j;
// --------------------------------------------------
// Computing [j]P for each j in {1, 3, ..., 2*sJ - 1}
ec_point_t P2, P4;
copy_point(&J[0], &P); // x(P)
// Next computations are required for allowing the use of the function get_A()
fp2_mul(&XZJ4[0], &J[0].x, &J[0].z); // Xj*Zj
fp2_add(&XZJ4[0], &XZJ4[0], &XZJ4[0]); // 2Xj*Zj
fp2_add(&XZJ4[0], &XZJ4[0], &XZJ4[0]); // 4Xj*Zj
fp2_neg(&XZJ4[0], &XZJ4[0]); // -4Xj*Zj
xDBLv2(&P2, &P, &A); // x([2]P)
xADD(&J[1], &P2, &J[0], &J[0]); // x([3]P)
// Next computations are required for allowing the use of the function get_A()
fp2_mul(&XZJ4[1], &J[1].x, &J[1].z); // Xj*Zj
fp2_add(&XZJ4[1], &XZJ4[1], &XZJ4[1]); // 2Xj*Zj
fp2_add(&XZJ4[1], &XZJ4[1], &XZJ4[1]); // 4Xj*Zj
fp2_neg(&XZJ4[1], &XZJ4[1]); // -4Xj*Zj
for (j = 2; j < sJ; j++)
{
xADD(&J[j], &J[j - 1], &P2, &J[j - 2]); // x([2*j + 1]P)
// Next computations are required for allowing the use of the function get_A()
fp2_mul(&XZJ4[j], &J[j].x, &J[j].z); // Xj*Zj
fp2_add(&XZJ4[j], &XZJ4[j], &XZJ4[j]); // 2Xj*Zj
fp2_add(&XZJ4[j], &XZJ4[j], &XZJ4[j]); // 4Xj*Zj
fp2_neg(&XZJ4[j], &XZJ4[j]); // -4Xj*Zj
};
// ----------------------------------------------------------
// Computing [i]P for i in { (2*sJ) * (2i + 1) : 0 <= i < sI}
// and the linear factors of h_I(W)
ec_point_t Q, Q2, tmp1, tmp2;
int bhalf_floor= sJ >> 1;
int bhalf_ceil = sJ - bhalf_floor;
xDBLv2(&P4, &P2, &A); // x([4]P)
swap_points(&P2, &P4, -(uint64_t)(sJ % 2)); // x([4]P) <--- coditional swap ---> x([2]P)
xADD(&Q, &J[bhalf_ceil], &J[bhalf_floor - 1], &P2); // Q := [2b]P
swap_points(&P2, &P4, -(uint64_t)(sJ % 2)); // x([4]P) <--- coditional swap ---> x([2]P)
// .............................................
xDBLv2(&Q2, &Q, &A); // x([2]Q)
xADD(&tmp1, &Q2, &Q, &Q); // x([3]Q)
fp2_neg(&I[0][0], &Q.x);
fp2_copy(&I[0][1], &Q.z);
fp2_neg(&I[1][0], &tmp1.x);
fp2_copy(&I[1][1], &tmp1.z);
copy_point(&tmp2, &Q);
for (j = 2; j < sI; j++){
xADD(&tmp2, &tmp1, &Q2, &tmp2); // x([2*j + 1]Q)
fp2_neg(&I[j][0], &tmp2.x);
fp2_copy(&I[j][1], &tmp2.z);
swap_points(&tmp1, &tmp2, -(uint64_t)1);
}
// ----------------------------------------------------------------
// Computing [k]P for k in { 4*sJ*sI + 1, ..., l - 6, l - 4, l - 2}
// In order to avoid BRANCHES we make allways copy in K[0] and K[1]
// by assuming that these entries are only used when sK >= 1 and
// sK >= 2, respectively.
//if (sK >= 1)
copy_point(&K[0], &P2); // x([l - 2]P) = x([2]P)
//if (sK >= 2)
copy_point(&K[1], &P4); // x([l - 4]P) = x([4]P)
for (j = 2; j < sK; j++)
xADD(&K[j], &K[j - 1], &P2, &K[j - 2]); // x([l - 2*(j+1)]P) = x([2 * (j+1)]P)
// ----------------------------------------------------------------
// ~~~~~~~~ ~~~~~~~~
// | | | |
// Computing h_I(W) = | | (W - x([i]P)) = | | (Zi * W - Xi) / Zi where x([i]P) = Xi/Zi
// i in I i in I
// In order to avoid costly inverse computations in fp, we are gonna work with projective coordinates
product_tree_LENFeq2(ptree_hI, deg_ptree_hI, 0, I, sI); // Product tree of hI
if (!scaled)
{
// (unscaled) remainder tree approach
reciprocal_tree(rtree_hI, rtree_A, 2*sJ + 1, ptree_hI, deg_ptree_hI, 0, sI); // Reciprocal tree of hI
}
else
{
// scaled remainder tree approach
fp2_t f_rev[sI_max + 1];
for (j = 0; j < (sI + 1); j++)
fp2_copy(&f_rev[j], &ptree_hI[0][sI - j]);
if (sI > (2*sJ - sI + 1))
reciprocal(R0, &A0, f_rev, sI + 1, sI);
else
reciprocal(R0, &A0, f_rev, sI + 1, 2*sJ - sI + 1);
};
}
void kps_clear(int i){
if (TORSION_ODD_PRIMES[i] > gap)
{
if (!scaled)
clear_tree(rtree_hI, 0, sizeI[i]);
clear_tree(ptree_hI, 0, sizeI[i]);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,349 +0,0 @@
#define _POLY_MUL_REDC_H_
#include "poly.h"
#include <assert.h>
void reciprocal(poly h, fp2_t *c, const poly f, const int lenf, const int n){
// Writes a polynomial to h and a field element to c such that f*h = c mod x^n
// REQUIRES h to have space for n terms
// NOT responsible for terms in h beyond h[n-1]
int i;
// Case when f needs to be padded with zeroes
if(n > lenf)
{
fp2_t fpad[n];
for(i = 0; i < lenf; i++)
fp2_copy(&fpad[i], &f[i]);
for(i = lenf; i < n; i++)
fp2_set(&fpad[i], 0);
reciprocal(h, c, fpad, n, n);
return;
}
// Trivial case
if(n == 0)
{
fp2_set(&*c, 0);
return;
}
// Case n = 1
if(n == 1)
{
fp2_copy(&*c, &f[0]);
fp_mont_setone(h[0].re);fp_set(h[0].im,0);
return;
}
// Case n = 2
if(n == 2)
{
fp2_sqr(&*c, &f[0]);
fp2_copy(&h[0], &f[0]);
fp2_neg(&h[1], &f[1]);
return;
}
// Case n = 3
if(n == 3)
{
fp2_t t0, t1;
fp2_sqr(&t0, &f[1]);
fp2_mul(&t1, &f[0], &f[2]);
fp2_sub(&t1, &t1, &t0);
fp2_mul(&t1, &t1, &f[0]);
reciprocal(h, c, f, 2, 2);
fp2_mul(&h[0], &h[0], &*c);
fp2_mul(&h[1], &h[1], &*c);
fp2_neg(&h[2], &t1);
fp2_sqr(&*c, &*c);
return;
}
// Case n = 4
if(n == 4)
{
fp2_t t0, t1, t2, t3, g[2];
reciprocal(g, &t3, f, 2, 2);
fp2_sqr(&t0, &f[1]);
fp2_mul(&t1, &g[0], &f[2]);
fp2_mul(&t2, &g[0], &f[3]);
fp2_mul(&h[1], &g[1], &f[2]);
fp2_sub(&t0, &t1, &t0);
fp2_add(&t1, &t2, &h[1]);
fp2_mul(&t2, &t0, &g[0]);
fp2_mul(&h[1], &t0, &g[1]);
fp2_mul(&h[3], &t1, &g[0]);
fp2_add(&h[3], &h[1], &h[3]);
fp2_mul(&h[0], &g[0], &t3);
fp2_mul(&h[1], &g[1], &t3);
fp2_neg(&h[2], &t2);
fp2_neg(&h[3], &h[3]);
fp2_sqr(&*c, &t3);
return;
}
// General case
// Compute the reciprocal g mod x^m for m = ceil(n/2)
// Then f*g-c is multiple of x^m so we only care about terms from m to n-1
const int m = n - (n>>1);
fp2_t g[m], t[m], t0;
reciprocal(g, &t0, f, lenf, m);
poly_mul_middle(t, g, m, f, n);
poly_mul_low(t, n-m, g, m, &(t[2*m-n]), n-m);
for(i = 0; i < m; i++)
fp2_mul(&h[i], &g[i], &t0);
for(i = m; i < n; i++)
fp2_neg(&h[i], &t[i-m]);
fp2_sqr(&*c, &t0);
return;
}
void poly_redc(poly h, const poly g, const int leng, const poly f, const int lenf,//
const poly f_rev_inv, const fp2_t c)
{
// Computes h(x) = a * g(x) mod f(x) for some scalar a, writting lenf-1 terms to h.
// REQUIRES an inverse f_rev_inv such that f_rev*f_rev_inv = c mod x^(leng-lenf+1),
// where f_rev is the polynomial with the coefficients of f listed in reverse order.
// The scalar a is equal to c, except for special cases:
// - If leng<lenf (no reduction needed) then a = 1
// - If lenf = leng = 2, then a = f[1]
// - If lenf = leng = 3, then a = f[2]
// - If lenf=2, leng=3 then a = 2*f[1]^2
//
// REQUIRES h to have space for lenf-1 terms
// NOT responsible for terms in h beyond h[lenf-2]
int i;
// Case without reduction
if(leng < lenf)
{
for(i = 0; i < leng; i++)
fp2_copy(&h[i], &g[i]);
for(i = leng; i < lenf-1; i++)
fp2_set(&h[i], 0);
return;
}
// Small cases for f linear
if(lenf == 2)
{
if(leng == 2)
{
fp2_t t0;
fp2_mul(&t0, &g[0], &f[1]);
fp2_mul(&h[0], &g[1], &f[0]);
fp2_sub(&h[0], &t0, &h[0]);
return;
}
if(leng == 3)
{
fp2_t f0f1, f02, f12;
fp2_sqr(&f02, &f[0]);
fp2_sqr(&f12, &f[1]);
fp2_sub(&f0f1, &f[0], &f[1]);
fp2_sqr(&f0f1, &f0f1);
fp2_sub(&f0f1, &f0f1, &f02);
fp2_sub(&f0f1, &f0f1, &f12);
fp2_add(&f02, &f02, &f02);
fp2_add(&f12, &f12, &f12);
fp2_mul(&f02, &f02, &g[2]);
fp2_mul(&f12, &f12, &g[0]);
fp2_mul(&f0f1, &f0f1, &g[1]);
fp2_add(&h[0], &f02, &f12);
fp2_add(&h[0], &h[0], &f0f1);
return;
}
}
// Small case for f cuadratic
if(lenf == 3 && leng == 3)
{
fp2_t f2g1, f2g0, f1g2;
fp2_mul(&f2g1, &g[1], &f[2]);
fp2_mul(&f2g0, &g[0], &f[2]);
fp2_mul(&f1g2, &g[2], &f[1]);
fp2_mul(&h[0], &g[2], &f[0]);
fp2_sub(&h[0], &f2g0, &h[0]);
fp2_sub(&h[1], &f2g1, &f1g2);
return;
}
// General case
fp2_t g_reversed[leng], Q[leng - lenf + 1], Q_reversed[leng - lenf + 1];
for(i = 0; i < leng; i++)
fp2_copy(&g_reversed[i], &g[leng-1-i]);
poly_mul_low(Q, leng-lenf+1, f_rev_inv, leng-lenf+1, g_reversed, leng-lenf+1);
for(i = 0; i < leng - lenf + 1; i++)
fp2_copy(&Q_reversed[i], &Q[leng - lenf - i]);
poly_mul_low(g_reversed, lenf-1, Q_reversed, leng-lenf+1, f, lenf);
for(i = 0; i < lenf-1; i++)
{
fp2_mul(&h[i], &g[i], &c);
fp2_sub(&h[i], &h[i], &g_reversed[i]);
}
return;
}
void reciprocal_tree(poly *R, fp2_t *A, const int leng, const poly H[], const int DEG[],//
const int root, const int n)
{
// Given a product tree H with degrees tree DEG rooted at root and generated
// by n polynomials, writes the reverse-reciprocal polynomials to R and field elements
// to A such that Rev(H[i])*R[i] = A[i] mod x^(N) for all nodes but the leaves.
// The mod is N = deg(parent)-deg(self) for inner nodes, or N = leng - deg(root) for the root.
//
// REQUIRES that leng >= DEG[0] and that R,A have enough space for the tree (see product_tree)
if(n == 0)
return;
const int parent = (root-1) >> 1;
const int brother = root - 1 + 2*(root & 1);
int lenr;
if(root > 0)
lenr = DEG[parent] - DEG[root];
else
lenr = leng - DEG[root];
R[root] = malloc(sizeof(fp2_t)*lenr);
// ----------------------------------
// base cases determined by poly_redc
if(n == 1)
return;
// case for computing g mod f when len(f), len(g) = 3
if (DEG[root] == 2 && lenr == 1)
{
reciprocal_tree(R, A, lenr-1, H, DEG, 2*root+1, n-(n>>1));
reciprocal_tree(R, A, lenr-1, H, DEG, 2*root+2, n>>1);
return;
}
// ----------------------------------
int i;
// When the parent's inverse was calculated to a smaller modulus, need to invert from scratch
if(root == 0 || leng < lenr)
{
for(i = 0; i < lenr && i < DEG[root]+1; i++)
fp2_copy(&R[root][i], &H[root][DEG[root]-i]);
for(i = DEG[root]+1; i < lenr; i++){
fp2_set(&R[root][i], 0);
}
reciprocal(R[root], &(A[root]), R[root], lenr, lenr);
}
else
{
// When parent's inverse was to a greater/equal modulus, this inverse can be obtained from it
for(i = 0; i < lenr; i++)
fp2_copy(&R[root][i], &H[brother][DEG[brother]-i]);
poly_mul_low(R[root], lenr, R[parent], leng, R[root], lenr);
fp2_copy(&A[root], &A[parent]);
}
// Now move on to the children
reciprocal_tree(R, A, lenr-1, H, DEG, 2*root+1, n-(n>>1));
reciprocal_tree(R, A, lenr-1, H, DEG, 2*root+2, n>>1);
return;
}
void multieval_unscaled(fp2_t REM[], const poly g, const int leng, const poly R[], const fp2_t A[],//
const poly H[], const int DEG[], const int root, const int n)
{
// Given the product tree H and reciprocal tree R,A generated by f_0, ... , f_{n-1},
// with corresponding degrees tree DEG[] and rooted at root, writes the constant term
// of c_i*g mod f_i to REM[i]. The constants c_i are unspecified, but are a function
// only of leng and f_0,...,f_{n-1} so they cancel out when taking the ratios of
// remainders of different g's of the same length.
//
// REQUIRES REM to have space for n terms
if(n == 0)
return;
fp2_t g_mod[DEG[root]];
poly_redc(g_mod, g, leng, H[root], DEG[root]+1, R[root], A[root]);
if(n == 1)
{
fp2_copy(&REM[0], &g_mod[0]);
return;
}
multieval_unscaled(REM, g_mod, DEG[root], R, A, H, DEG, 2*root+1, n-(n>>1));
multieval_unscaled(&(REM[n-(n>>1)]), g_mod, DEG[root], R, A, H, DEG, 2*root+2, n>>1);
return;
}
void multieval_scaled(fp2_t REM[], const poly G, const poly H[], //
const int DEG[], const int root, const int n)
{
// Given the product tree H generated by LINEAR f_0,...,f_{n-1} rooted at root and with
// corresponding degrees tree DEG, writes the constant term of c_i * g mod f_i(x) to REM[i]
// The constants c_i are unspecified but are only a function of leng and f_0,...,f_{n-1},
// so they cancel out when taking the ratios of remainders of different g's of the same length.
//
// REQUIRES REM to have space for n terms and n > 1
// Also REQUIRES G = rev((rev(g mod F)) * F_rev_inv mod x^deg(F)-1) where F = H[root]
// and F_rev_inv is its reverse's reciprocal mod x^deg(F)
if(root == 0)
{
if(n == 1)
{
fp2_copy(&REM[0], &G[DEG[root]-1]);
return;
}
else
{
multieval_scaled(REM, G, H, DEG, 2*root+1, n-(n>>1));
multieval_scaled(&(REM[n-(n>>1)]), G, H, DEG, 2*root+2, n>>1);
return;
}
}
const int parent = (root-1) >> 1;
const int brother = root - 1 + 2*(root & 1);
const int uncle = parent - 1 + 2*(parent & 1);
fp2_t fg[DEG[brother]+1];
if(root > 2)
poly_mul_middle(fg, H[brother], DEG[brother]+1, G, DEG[uncle]+1);
else
poly_mul_middle(fg, H[brother], DEG[brother]+1, G, DEG[0]);
if(n == 1)
{
fp2_copy(&REM[0], &fg[DEG[brother]]);
return;
}
multieval_scaled(REM, fg, H, DEG, 2*root+1, n-(n>>1));
multieval_scaled(&(REM[n-(n>>1)]), fg, H, DEG, 2*root+2, n>>1);
return;
}

View File

@@ -1,231 +0,0 @@
#include <tedwards.h>
#include <assert.h>
// a*x^2+y^2=1+d*x^2*y^2
// a = A.x/A.z + 2, d = A.x/A.z - 2
void ted_init(ted_point_t* P)
{ // Initialize point as identity element (X:Y:Z:T) <- (0:1:1:0)
fp_t one = {0};
memset((digit_t*)P, 0, NWORDS_FIELD*RADIX*8/8);
one[0] = 1;
fp_tomont(P->x.re, one);
}
void copy_ted_point(ted_point_t* P, ted_point_t const* Q)
{
fp2_copy(&(P->x), &(Q->x));
fp2_copy(&(P->y), &(Q->y));
fp2_copy(&(P->z), &(Q->z));
fp2_copy(&(P->t), &(Q->t));
}
void ted_dbl(ted_point_t *Q, ted_point_t const *P, ec_curve_t const* E)
{
// A = X1^2
// B = Y1^2
// C = 2*Z1^2
// D = a*A
// K = (X1+Y1)^2-A-B
// G = D+B
// F = G-C
// H = D-B
// X3 = K*F
// Y3 = G*H
// T3 = K*H
// Z3 = F*G
// TODO: neutral element
fp2_t A, B, C, D, K, G, F, H;
fp2_sqr(&A, &P->x);
fp2_sqr(&B, &P->y);
fp2_sqr(&C, &P->z);
fp2_add(&C, &C, &C);
fp2_mul(&D, &A, &E->A);
fp2_add(&K, &P->x, &P->y);
fp2_sqr(&K, &K);
fp2_sub(&K, &K, &A);
fp2_sub(&K, &K, &B);
fp2_add(&G, &D, &B);
fp2_sub(&F, &G, &C);
fp2_sub(&H, &D, &B);
fp2_mul(&Q->x, &K, &F);
fp2_mul(&Q->y, &G, &H);
fp2_mul(&Q->t, &K, &H);
fp2_mul(&Q->z, &F, &G);
}
void ted_add(ted_point_t* S, ted_point_t const* P, ted_point_t const* Q, ec_curve_t const* E)
{
// A = X1*X2
// B = Y1*Y2
// C = Z1*T2
// D = T1*Z2
// K = D+C
// F = (X1-Y1)*(X2+Y2)+B-A
// G = B+a*A
// H = D-C
// X3 = K*F
// Y3 = G*H
// T3 = K*H
// Z3 = F*G
// TODO: neutral element
ted_point_t res;
if (is_ted_equal(P, Q)) {
ted_dbl(S, P, E);
return;
}
//assert(!is_ted_equal(P, Q));
ted_neg(&res, P);
if (is_ted_equal(&res, Q)) {
ted_init(S);
return;
}
// assert(!ted_equal(&res,Q));
fp2_t A, B, C, D, K, F, G, H, tmp;
fp2_mul(&A, &P->x, &Q->x);
fp2_mul(&B, &P->y, &Q->y);
fp2_mul(&C, &P->z, &Q->t);
fp2_mul(&D, &P->t, &Q->z);
fp2_add(&K, &D, &C);
fp2_add(&F, &Q->x, &Q->y);
fp2_sub(&tmp, &P->x, &P->y);
fp2_mul(&F, &F, &tmp);
fp2_add(&F, &F, &B);
fp2_sub(&F, &F, &A);
fp2_mul(&G, &A, &E->A);
fp2_add(&G, &G, &B);
fp2_sub(&H, &D, &C);
fp2_mul(&res.x, &K, &F);
fp2_mul(&res.y, &G, &H);
fp2_mul(&res.t, &K, &H);
fp2_mul(&res.z, &F, &G);
if (fp2_is_zero(&res.x) && fp2_is_zero(&res.y) && fp2_is_zero(&res.z)) {
ted_dbl(S, P, E);
} else {
copy_ted_point(S, &res);
}
}
void ted_neg(ted_point_t* Q, ted_point_t const* P)
{
fp2_neg(&Q->x, &P->x);
fp2_copy(&Q->y, &P->y);
fp2_copy(&Q->z, &P->z);
fp2_neg(&Q->t, &P->t);
}
static bool xLIFT(fp2_t* y, const ec_point_t* P, const ec_curve_t* curve)
{ // Returns false if it is on the curve, true if it is on the twist
fp2_t z2, tmp1, tmp2, y2;
if (fp2_is_zero(&P->z)) return false;
// (X^2 + Z^2) C
fp2_sqr(&tmp1, &P->x);
fp2_sqr(&z2, &P->z);
fp2_add(&tmp1, &tmp1, &z2);
fp2_mul(&tmp1, &tmp1, &curve->C);
// X^2C + AXZ + Z^2C
fp2_mul(&tmp2, &P->x, &P->z);
fp2_mul(&tmp2, &tmp2, &curve->A);
fp2_add(&tmp1, &tmp1, &tmp2);
// X^3C + AX^2Z + XZ^2C = Z^3(Cx^3 + Ax^2 + Cx) = Z^3 C (B*y^2) = Z C (B*Y^2) // x = X/Z
fp2_mul(&tmp1, &tmp1, &P->x);
// (ZC)^(-1)
fp2_mul(&tmp2, &curve->C, &P->z);
assert(!fp2_is_zero(&tmp2));
fp2_inv(&tmp2);
fp2_mul(&y2, &tmp1, &tmp2); // (B*Y^2)
fp2_copy(y, &y2);
if (fp2_is_square(&y2)) { // on the curve
fp2_sqrt(y);
return false;
} else { // on the twist
fp2_t tmp = fp2_non_residue();
fp2_mul(y, y, &tmp);
fp2_sqrt(y);
return true;
}
}
//void mont_to_ted(ec_point_t* E, ec_point_t const* A, bool twist)
void mont_to_ted(ec_curve_t* ted_curve, ec_curve_t const* curve)
{
fp2_t tmp, two;
// A : y^2 = x^3 + (a/c)x^2 + x
fp2_copy(&tmp, &curve->C);
fp2_inv(&tmp); // 1/c
fp2_mul(&tmp, &tmp, &curve->A); // a/c
fp2_set(&two, 2);
fp2_tomont(&two, &two);
fp2_add(&ted_curve->A, &tmp, &two); // a/c + 2
fp2_sub(&ted_curve->C, &tmp, &two); // a/c - 2
//if (twist) {
// B = Fp2_inv(fp2_non_residue)
// tmp = fp2_non_residue();
// fp2_mul2(&E->x,&tmp);
// fp2_mul2(&E->z,&tmp);
//}
}
void mont_to_ted_point(ted_point_t* Q, ec_point_t const* P, ec_curve_t const* curve)
{
if (fp2_is_zero(&P->z)) {
fp2_set(&Q->x, 0);
fp2_set(&Q->y, 1);
fp2_set(&Q->z, 1);
fp2_set(&Q->t, 0);
fp_tomont(Q->y.re, Q->y.re);
fp_tomont(Q->z.re, Q->z.re);
} else {
fp2_t tmp, y;
xLIFT(&y, P, curve);
fp2_add(&tmp, &P->x, &P->z);
fp2_mul(&Q->x, &P->x, &tmp);
fp2_sub(&Q->y, &P->x, &P->z);
fp2_mul(&Q->y, &Q->y, &y);
fp2_mul(&Q->z, &tmp, &y);
fp2_copy(&Q->t, &Q->z);
fp2_inv(&Q->t);
fp2_mul(&Q->t, &Q->t, &Q->x);
fp2_mul(&Q->t, &Q->t, &Q->y);
}
}
void ted_to_mont_point(ec_point_t* Q, ted_point_t const* P)
{
fp2_add(&Q->x, &P->z, &P->y);
fp2_sub(&Q->z, &P->z, &P->y);
}
bool is_ted_equal(ted_point_t const* P1, ted_point_t const* P2)
{
fp2_t x1z2, y1z2;
fp2_t y2z1, x2z1;
fp2_t t1y2, t2y1;
fp2_mul(&x1z2, &P1->x, &P2->z);
fp2_mul(&y1z2, &P1->y, &P2->z);
fp2_mul(&y2z1, &P2->y, &P1->z);
fp2_mul(&x2z1, &P2->x, &P1->z);
fp2_mul(&t1y2, &P1->t, &P2->y);
fp2_mul(&t2y1, &P2->t, &P1->y);
return fp2_is_equal(&x1z2, &x2z1) && fp2_is_equal(&y1z2, &y2z1) && fp2_is_equal(&t1y2, &t2y1);
}

View File

@@ -1,18 +0,0 @@
#include "ec-tests.h"
int main(int argc, char* argv[])
{
if (argc < 3) {
printf("Please enter an argument: 'test' or 'bench' and <reps>\n");
exit(1);
}
if (!strcmp(argv[1], "test")) {
TEST_LOOPS = atoi(argv[2]);
return !(ec_test() & dlog_test());
} else if (!strcmp(argv[1], "bench")) {
BENCH_LOOPS = atoi(argv[2]);
return !(ec_run() & dlog_run());
} else {
exit(1);
}
}

View File

@@ -1,142 +0,0 @@
#include <assert.h>
#include <time.h>
#include <stdio.h>
#include <fp2.h>
#include <inttypes.h>
static int BENCH_LOOPS = 1000; // Number of iterations per bench
static int TEST_LOOPS = 512; // Number of iterations per test
bool fp2_isequal(fp2_t a, fp2_t b){
return fp_is_equal(a.re, b.re) && fp_is_equal(a.im, b.im);
}
bool fp2_isone(fp2_t a){
fp_t one;
bool res = 1;
fp_mont_setone(one);
for(int i = 0; i < NWORDS_FIELD; i++){
res = res && (a.re[i] == one[i]);
res = res && (a.im[i] == 0);
}
return res;
}
void fp2_print(char *name, fp2_t const a){
fp2_t b;
fp2_set(&b, 1);
fp2_mul(&b, &b, &a);
printf("%s = 0x", name);
for(int i = NWORDS_FIELD - 1; i >=0; i--)
printf("%016" PRIx64, b.re[i]);
printf(" + i*0x");
for(int i = NWORDS_FIELD - 1; i >=0; i--)
printf("%016" PRIx64, b.im[i]);
printf("\n");
}
// VERY NOT SECURE (testing only)
void fp2_random(fp2_t *a){
for(int i = 0; i < NWORDS_FIELD; i++){
a->re[i] = rand();
a->im[i] = rand();
}
// Normalize
fp2_t one;
fp_mont_setone(one.re);fp_set(one.im,0);
fp2_mul(&*a, &*a, &one);
// Update seed
srand((unsigned) a->re[0]);
}
int main(int argc, char* argv[])
{
if (argc > 1) {
TEST_LOOPS = atoi(argv[1]);
}
fp2_t fp2_0, fp2_1;
// ------------
fp2_set(&fp2_0, 0);
fp_mont_setone(fp2_1.re);fp_set(fp2_1.im,0);
// ------------
int i;
fp2_t a, b, c, d;
fp_t e;
for (i = 0; i < TEST_LOOPS; i++)
{
printf("[%3d%%] Testing fp2_t arithmetic", 100 * i / (int)TEST_LOOPS);
fflush(stdout);
printf("\r\x1b[K");
// Random elements of fp
fp2_random(&a);
fp2_random(&b);
fp2_copy(&c, &a);
c.re[0] += 1;
fp2_copy(&d, &b);
d.re[0] -= 1;
assert(fp2_isequal(a,b) == 0); // different values check --> (a != b)
assert(fp2_isequal(c,c) == 1); // equal values check --> 1 (c == c)
// Testing neg
fp2_set(&b, 0);
fp2_copy(&c, &a);
fp2_neg(&a, &a);
fp2_sub(&c, &b, &c);
assert(fp2_isequal(a,c) == 1);
fp_mont_setone(a.re);fp_set(a.im,0); // Now a == 1
fp2_set(&b, 0); // Now b == 0
assert(fp2_is_zero(&a) == 0);
assert(fp2_is_zero(&b) == 1);
// testing c - c
fp2_sub(&d, &c, &c);
assert(fp2_is_zero(&d) == 1);
// tetsing c * 0
fp2_mul(&d, &c, &b);
assert(fp2_is_zero(&d) == 1);
// tetsing c * 1 ... recall, in Montgomery domain R mod p plays the role of the 1
fp_mont_setone(a.re);fp_set(a.im,0);
fp2_mul(&d, &c, &a);
assert(fp2_isequal(d, c) == 1);
// fp_set(e, 1); // Now e == 1
// fp2_pow(d, e, c);
// assert(fp2_isequal(d, c) == 1);
// fp_set(e, 0); // Now e == 0
// fp2_pow(d, e, c);
// assert(fp2_isone(d) == 1);
// fp2_set(a, 1); // Now e == R mod p
// fp_random(e);
// fp2_pow(d, e, a);
// assert(fp2_isone(d) == 1);
// Testing 1/a by computing (1/a) x a
fp2_random(&a);
fp2_copy(&b, &a);
fp2_inv(&a);
fp2_mul(&c, &a, &b);
assert(fp2_isone(c) == 1);
fp2_random(&a);
fp2_sqr(&b, &a);
assert( fp2_is_square(&b) );
};
if(TEST_LOOPS){
printf("[%2d%%] Tested fp2_t arithmetic:\tNo errors!\n", 100 * i /TEST_LOOPS);
}
printf("-- All tests passed.\n");
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,386 +0,0 @@
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include "ec.h"
#include "isog.h"
#include "test-basis.h"
#include <bench.h>
static int BENCH_LOOPS = 1000; // Number of iterations per bench
static int TEST_LOOPS = 128; // Number of iterations per test
// void random_scalar(fp_t k, const uint8_t j)
// {
// // To implement a better random function (We must use some of the SHAKE family functions)
// do
// {
// randombytes((void *)k, keyspace_bytes[j]);
// } while (fp_issmaller((uint64_t *)k, keyspace_size[j]));
// }
// VERY NOT SECURE (testing only)
void fp2_random(fp2_t *a){
for(int i = 0; i < NWORDS_FIELD; i++){
a->re[i] = rand();
a->im[i] = rand();
}
// Normalize
fp2_t one;
fp_mont_setone(one.re);fp_set(one.im,0);
fp2_mul(&*a, &*a, &one);
// Update seed
srand((unsigned) a->re[0]);
}
// Affine Montgomery coefficient computation (A + 2C : 4C) --> A/C
void coeff(fp2_t *B, ec_point_t const A)
{
fp2_t t;
fp2_add(&t, &A.x, &A.x); // (2 * A24)
fp2_sub(&t, &t, &A.z); // (2 * A24) - C24
fp2_copy(&*B, &A.z);
fp2_inv(&*B); // 1 / (C24)
fp2_add(&t, &t, &t); // 4*A = 2[(2 * A24) - C24]
fp2_mul(&*B, &t, &*B); // A/C = 2[(2 * A24) - C24] / C24
}
// Determines if point is fp2-rational (if not, then it must be a zero trace point)
uint8_t isrational(ec_point_t const T, fp2_t const a)
{
fp2_t XT, tmp, aux, YT_squared;
fp2_copy(&XT, &T.z);
fp2_inv(&XT);
fp2_mul(&XT, &XT, &T.x);
fp2_sqr(&tmp, &XT);
fp2_mul(&aux, &tmp, &XT);
fp2_mul(&tmp, &tmp, &a);
fp2_add(&YT_squared, &tmp, &aux);
fp2_add(&YT_squared, &YT_squared, &XT);
return fp2_is_square(&YT_squared);
}
// ladder3pt computes x(P + [m]Q)
void ladder3pt(ec_point_t* R, fp_t const m, ec_point_t const* P, ec_point_t const* Q, ec_point_t const* PQ, ec_point_t const* A)
{
ec_point_t X0, X1, X2;
copy_point(&X0, Q);
copy_point(&X1, P);
copy_point(&X2, PQ);
int i,j;
uint64_t t;
for (i = 0; i < NWORDS_FIELD; i++)
{
t = 1;
for (j = 0 ; j < 64; j++)
{
swap_points(&X1, &X2, -((t & m[i]) == 0));
xDBLADD(&X0, &X1, &X0, &X1, &X2, A);
swap_points(&X1, &X2, -((t & m[i]) == 0));
t <<= 1;
};
};
copy_point(R, &X1);
}
// For computing [(p + 1) / l_i]P, i:=0, ..., (N - 1)
void cofactor_multiples(ec_point_t P[], ec_point_t const* A, size_t lower, size_t upper)
{
assert(lower < upper);
if (upper - lower == 1)
return ;
int i;
size_t mid = lower + (upper - lower + 1) / 2;
copy_point(&(P[mid]), &(P[lower]));
for (i = lower; i < (int)mid; i++)
xMULv2(&(P[mid]), &(P[mid]), &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], A);
for (i = (int)mid; i < (int)upper; i++)
xMULv2(&(P[lower]), &(P[lower]), &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], A);
cofactor_multiples(P, A, lower, mid);
cofactor_multiples(P, A, mid, upper);
}
// The projective x-coordinate point (X : Z) at infinity is such that Z == 0
static inline int isinfinity(ec_point_t const P)
{
return fp2_is_zero(&P.z);
}
int main(int argc, char* argv[])
{
if (argc > 1) {
TEST_LOOPS = atoi(argv[1]);
}
fp2_t fp2_0, fp2_1;
fp2_set(&fp2_0, 0);
fp_mont_setone(fp2_1.re);fp_set(fp2_1.im,0);
int i, j;
ec_point_t A;
fp2_set(&A.x, 0);
fp_mont_setone(A.z.re);fp_set(A.z.im,0);
fp2_add(&A.z, &A.z, &A.z); // 2C
fp2_add(&A.x, &A.x, &A.z); // A' + 2C
fp2_add(&A.z, &A.z, &A.z); // 4C
// Just to ensure the projective curve coeffientes are different from zero
assert( !fp2_is_zero(&A.x) & !fp2_is_zero(&A.x) );
fp2_t a;
coeff(&a, A);
ec_point_t PA, QA, PQA, PB, QB, PQB;
// Writing the public projective x-coordinate points into Montogmery domain
fp2_tomont(&(PA.x), &(xPA));
fp_mont_setone(PA.z.re);fp_set(PA.z.im,0);
fp2_tomont(&(QA.x), &(xQA));
fp_mont_setone(QA.z.re);fp_set(QA.z.im,0);
fp2_tomont(&(PQA.x), &(xPQA));
fp_mont_setone(PQA.z.re);fp_set(PQA.z.im,0);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
// ======================================================================================================
// Recall, PA, QA, and PQA are expeted to be N-order points, but we require to ensure they are of order N
for (j = 0; j < P_LEN; j++)
{
for (i = 1; i < TORSION_ODD_POWERS[j]; i++)
{
xMULv2(&PA, &PA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&QA, &QA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&PQA, &PQA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
};
};
assert( !isinfinity(PA) );
assert( !isinfinity(QA) );
assert( !isinfinity(PQA) );
ec_point_t P[P_LEN + M_LEN], Q[P_LEN + M_LEN], PQ[P_LEN + M_LEN];
copy_point(&(P[0]), &PA);
cofactor_multiples(P, &A, 0, P_LEN);
copy_point(&(Q[0]), &QA);
cofactor_multiples(Q, &A, 0, P_LEN);
copy_point(&(PQ[0]), &PQA);
cofactor_multiples(PQ, &A, 0, P_LEN);
for (j = 0; j < P_LEN; j++)
{
// x(PA)
assert( !isinfinity(P[j]) ); // It must be different from the point at infinity
assert( isrational(P[j], a) );
xMULv2(&P[j], &P[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(P[j]) ); // It must be now the point at infinity
// x(QA)
assert( !isinfinity(Q[j]) ); // It must be different from the point at infinity
assert( isrational(Q[j], a) );
xMULv2(&Q[j], &Q[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(Q[j]) ); // It must be now the point at infinity
// x(PQA)
assert( !isinfinity(PQ[j]) ); // It must be different from the point at infinity
assert( isrational(PQ[j], a) );
xMULv2(&PQ[j], &PQ[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(PQ[j]) ); // It must be now the point at infinity
};
// Writing the public projective x-coordinate points into Montogmery domain
fp2_tomont(&(PB.x), &(xPB));
fp_mont_setone(PB.z.re);fp_set(PB.z.im,0);
fp2_tomont(&(QB.x), &(xQB));
fp_mont_setone(QB.z.re);fp_set(QB.z.im,0);
fp2_tomont(&(PQB.x), &(xPQB));
fp_mont_setone(PQB.z.re);fp_set(PQB.z.im,0);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
// ======================================================================================================
// Recall, PB, QB, and PQB are expeted to be M-order points, but we require to ensure they are of order M
for (j = P_LEN; j < (P_LEN + M_LEN); j++)
{
for (i = 1; i < TORSION_ODD_POWERS[j]; i++)
{
xMULv2(&PB, &PB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&QB, &QB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&PQB, &PQB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
};
};
assert( !isinfinity(PB) );
assert( !isinfinity(QB) );
assert( !isinfinity(PQB) );
copy_point(&(P[P_LEN]), &PB);
cofactor_multiples(P, &A, P_LEN, P_LEN + M_LEN);
copy_point(&(Q[P_LEN]), &QB);
cofactor_multiples(Q, &A, P_LEN, P_LEN + M_LEN);
copy_point(&(PQ[P_LEN]), &PQB);
cofactor_multiples(PQ, &A, P_LEN, P_LEN + M_LEN);
for (j = P_LEN; j < (P_LEN+M_LEN); j++)
{
// x(PB)
assert( !isinfinity(P[j]) ); // It must be different from the point at infinity
assert( !isrational(P[j], a) );
xMULv2(&P[j], &P[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(P[j]) ); // It must be now the point at infinity
// x(QB)
assert( !isinfinity(Q[j]) ); // It must be different from the point at infinity
assert( !isrational(Q[j], a) );
xMULv2(&Q[j], &Q[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(Q[j]) ); // It must be now the point at infinity
// x(PQB)
assert( !isinfinity(PQ[j]) ); // It must be different from the point at infinity
assert( !isrational(PQ[j], a) );
xMULv2(&PQ[j], &PQ[j], &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(PQ[j]) ); // It must be now the point at infinity
};
fp2_t m;
// Writing the public projective x-coordinate points into Montogmery domain
fp2_tomont(&(PA.x), &(xPA));
fp_mont_setone(PA.z.re);fp_set(PA.z.im,0);
fp2_tomont(&(QA.x), &(xQA));
fp_mont_setone(QA.z.re);fp_set(QA.z.im,0);
fp2_tomont(&(PQA.x), &(xPQA));
fp_mont_setone(PQA.z.re);fp_set(PQA.z.im,0);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
fp2_tomont(&(PB.x), &(xPB));
fp_mont_setone(PB.z.re);fp_set(PB.z.im,0);
fp2_tomont(&(QB.x), &(xQB));
fp_mont_setone(QB.z.re);fp_set(QB.z.im,0);
fp2_tomont(&(PQB.x), &(xPQB));
fp_mont_setone(PQB.z.re);fp_set(PQB.z.im,0);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
ec_point_t R[P_LEN + M_LEN];
int k;
for (j = 0; j < TEST_LOOPS; j++)
{
printf("[%3d%%] Testing EC differential arithmetic", 100 * j / TEST_LOOPS);
fflush(stdout);
printf("\r\x1b[K");
fp2_random(&m);
ladder3pt(&(R[0]), m.re, &PA, &QA, &PQA, &A);
assert( isrational(R[0], a) );
for (k = 0; k < P_LEN; k++)
{
for (i = 1; i < TORSION_ODD_POWERS[k]; i++)
{
xMULv2(&R[0], &R[0], &(TORSION_ODD_PRIMES[k]), p_plus_minus_bitlength[k], &A);
assert( isrational(R[0], a) );
};
};
cofactor_multiples(R, &A, 0, P_LEN);
for (i = 0; i < P_LEN; i++)
{
assert( !isinfinity(R[i]) ); // It must be different from the point at infinity
assert( isrational(R[i], a) );
xMULv2(&R[i], &R[i], &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], &A);
assert( isinfinity(R[i]) ); // It must be now the point at infinity
};
fp2_random(&m);
ladder3pt(&(R[P_LEN]), m.re, &PB, &QB, &PQB, &A);
assert( !isrational(R[P_LEN], a) );
for (k = P_LEN; k < (P_LEN+M_LEN); k++)
{
for (i = 1; i < TORSION_ODD_POWERS[k]; i++)
{
xMULv2(&R[P_LEN], &R[P_LEN], &(TORSION_ODD_PRIMES[k]), p_plus_minus_bitlength[k], &A);
assert( !isrational(R[P_LEN], a) );
};
};
cofactor_multiples(R, &A, P_LEN, P_LEN + M_LEN);
for (i = P_LEN; i < (P_LEN+M_LEN); i++)
{
assert( !isinfinity(R[i]) ); // It must be different from the point at infinity
assert( !isrational(R[i], a) );
xMULv2(&R[i], &R[i], &(TORSION_ODD_PRIMES[i]), p_plus_minus_bitlength[i], &A);
assert( isinfinity(R[i]) ); // It must be now the point at infinity
};
};
if(TEST_LOOPS)
printf("[%3d%%] Tested EC differential arithmetic:\tNo errors!\n", 100 * j / TEST_LOOPS);
printf("-- All tests passed.\n");
// BENCHMARK xDBLv2
unsigned long long cycles, cycles1, cycles2;
cycles = 0;
ec_point_t PP[TEST_LOOPS], EE[TEST_LOOPS];
for(int i = 0; i < TEST_LOOPS; i++){
fp2_random(&PP[i].x);
fp2_random(&PP[i].z);
fp2_random(&EE[i].x);
fp2_random(&EE[i].z);
}
cycles1 = cpucycles();
for(int i = 0; i < TEST_LOOPS; i++){
xDBLv2(&PP[i], &PP[i], &EE[i]);
}
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
printf("xDBLv2 bench: %7lld cycles\n", cycles/TEST_LOOPS);
// BENCHMARK xIsog4
cycles = 0;
ec_point_t KK0[TEST_LOOPS], KK1[TEST_LOOPS], KK2[TEST_LOOPS];
for(int i = 0; i < TEST_LOOPS; i++){
fp2_random(&KK0[i].x);
fp2_random(&KK0[i].z);
fp2_random(&KK1[i].x);
fp2_random(&KK1[i].z);
fp2_random(&KK2[i].x);
fp2_random(&KK2[i].z);
}
cycles1 = cpucycles();
for(int i = 0; i < TEST_LOOPS; i++){
fp2_t t0, t1;
fp2_add(&t0, &PP[i].x, &PP[i].z);
fp2_sub(&t1, &PP[i].x, &PP[i].z);
fp2_mul(&(EE[i].x), &t0, &KK1[i].x);
fp2_mul(&(EE[i].z), &t1, &KK2[i].x);
fp2_mul(&t0, &t0, &t1);
fp2_mul(&t0, &t0, &KK0[i].x);
fp2_add(&t1, &(EE[i].x), &(EE[i].z));
fp2_sub(&(EE[i].z), &(EE[i].x), &(EE[i].z));
fp2_sqr(&t1, &t1);
fp2_sqr(&(EE[i].z), &(EE[i].z));
fp2_add(&(EE[i].x), &t0, &t1);
fp2_sub(&t0, &(EE[i].z), &t0);
fp2_mul(&(EE[i].x), &(EE[i].x), &t1);
fp2_mul(&(EE[i].z), &(EE[i].z), &t0);
}
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
printf("xeval_4 bench: %7lld cycles\n", cycles/TEST_LOOPS);
return 0;
}

View File

@@ -1,445 +0,0 @@
#include <poly.h>
#include <assert.h>
#include <stdio.h>
bool fp2_isequal(fp2_t a, fp2_t b){
return fp_is_equal(a.re, b.re) && fp_is_equal(a.im, b.im);
}
// VERY NOT SECURE (testing only)
void fp2_random(fp2_t *a){
for(int i = 0; i < NWORDS_FIELD; i++){
a->re[i] = rand();
a->im[i] = rand();
}
// Normalize
fp2_t one;
fp_mont_setone(one.re);fp_set(one.im,0);
fp2_mul(&*a, &*a, &one);
// Update seed
srand((unsigned) a->re[0]);
}
void slow_mul(poly h, poly f, int lenf, poly g, int leng){
// Computes h = f*g by school method
fp2_t a, b;
int nf, ng, e;
int lenh = lenf + leng - 1;
if(lenh <= 0){
return;
}
fp2_t fg[lenh];
if (leng > lenf){
slow_mul(h, g, leng, f, lenf);
return;
}
for(e = 0; e < lenh; e++){
if (lenf - 1 < e){
nf = lenf - 1;
}
else{
nf = e;
}
ng = e - nf;
fp2_set(&a, 0);
while( (ng < leng) & (nf >= 0) ){
fp2_mul(&b, &f[nf], &g[ng]);
fp2_add(&a, &a, &b);
nf--;
ng++;
}
fp2_copy(&fg[e], &a);
}
for(e = 0; e < lenh; e++){
fp2_copy(&h[e], &fg[e]);
}
return;
}
int main(){
fp2_t fp2_0, fp2_1;
#define nmax 16
int nf, ng, n, e;
fp2_set(&fp2_0, 0);
fp_mont_setone(fp2_1.re);fp_set(fp2_1.im,0);
//TEST MULTIPLICATION BY 0
for(nf = 2; nf < nmax; nf++){
fp2_t f[nf], h[nf-1];
printf("[%3d%%] Testing multiplication by 0", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(e = 0; e < nf; e++){
fp2_random(&f[e]);
}
poly_mul(h, f, nf, f, 0);
for(e = 0; e < nf-1; e++){
assert(fp2_is_zero(&h[e])==1);
}
poly_mul(h, f, 0, f, nf);
for(e = 0; e < nf-1; e++){
assert(fp2_is_zero(&h[e])==1);
}
}
printf("[%3d%%] Tested multiplication by 0:\t\tNo errors!\n", 100 * nf / nmax);
//TEST FOR f, g, h DISJOINT MEMORY SPACES
for(nf = 1; nf < nmax; nf++){
printf("[%3d%%] Testing multiplication", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(ng = 1; ng < nmax; ng++){
fp2_t f[nf]; //Random length nf poly
for(e = 0; e < nf; e++){
fp2_random(&f[e]);
}
fp2_t g[ng]; // Random length ng poly
for(e = 0; e < ng; e++){
fp2_random(&g[e]);
}
fp2_t h[nf+ng-1];// Compute product
poly_mul(h, f, nf, g, ng);
fp2_t fg[nf+ng-1]; // Compute the product by school method
slow_mul(fg, f, nf, g, ng);
for(e = 0; e < nf + ng - 1; e++){ // Verify answer term by term
assert(fp2_isequal(h[e], fg[e])==1);
}
}
}
printf("[%3d%%] Tested multiplication:\t\t\tNo errors!\n", 100 * nf / nmax);
// TEST FOR f, g CONTIGIOUS AND RESULT SAVED OVER THEM
for(nf = 1; nf < nmax; nf++){
printf("[%3d%%] Testing multiplication in place", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(ng = 1; ng < nmax; ng++){
fp2_t h[nf+ng];
//Random length nf poly
for(e = 0; e < nf; e++){
fp2_random(&h[e]);
}
// Random length ng poly
for(e = 0; e < ng; e++){
fp2_random(&h[e+nf]);
}
// Compute the product
fp2_t fg[nf+ng-1];
slow_mul(fg, h, nf, &(h[nf]), ng); // School method
poly_mul(h, h, nf, &(h[nf]), ng); // Karatsuba method
for(e = 0; e < nf + ng - 1; e++){ // Verify answer term by term
assert(fp2_isequal(h[e], fg[e])==1);
}
}
}
printf("[%3d%%] Tested multiplication in place:\t\tNo errors!\n", 100 * nf / nmax);
//TEST FOR MULTIPLICATION MOD X^N BY 0
for(nf = 2; nf < nmax; nf++){
fp2_t f[nf];
printf("[%3d%%] Testing mul mod x^n by 0", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(e = 0; e < nf; e++){
fp2_random(&f[e]);
}
for(n = 1; n < nmax; n++){
fp2_t h[n];
poly_mul_low(h, n, f, nf, f, 0);
for(e = 0; e < n; e++){
assert(fp2_is_zero(&h[e])==1);
}
poly_mul_low(h, n, f, 0, f, nf);
for(e = 0; e < n; e++){
assert(fp2_is_zero(&h[e])==1);
}
}
}
printf("[%3d%%] Tested mul mod x^n by 0:\t\t\tNo errors!\n", 100 * nf / nmax);
//TEST FOR MULTIPLICATION MOD X^N
for(nf = 1; nf < nmax; nf++){
printf("[%3d%%] Testing mul mod x^n", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(ng = 1; ng < nmax; ng++){
fp2_t f[nf], g[ng], fg[nf+ng-1];
poly h;
//Get random polynomials
for(e = 0; e < nf; e++){
fp2_random(&f[e]);
}
for(e = 0; e < ng; e++){
fp2_random(&g[e]);
}
//Save regular result to fg
slow_mul(fg, f, nf, g, ng);
//Compute result mod x^n
for(n = 1; n < 2*nmax; n++){
h = malloc(sizeof(fp2_t)*n);
poly_mul_low(h, n, f, nf, g, ng);
//Compare with expected
e = 0;
while(e < nf+ng-1 && e < n){
assert(fp2_isequal(h[e], fg[e]) == 1);
e++;
}
while(e < n){
assert(fp2_is_zero(&h[e]) == 1);
e++;
}
free(h);
}
}
}
printf("[%3d%%] Tested mul mod x^n:\t\t\tNo errors!\n", 100 * nf / nmax);
//TEST FOR POLY_MUL_MIDDLE
for(nf = 1; nf < 2*nmax; nf+=1){
fp2_t f[nf];
printf("[%3d%%] Testing poly_mul_middle", 100 * nf / (2*nmax));
fflush(stdout);
printf("\r\x1b[K");
for(ng = (nf+1)>>1; ng < (nf+1)-((nf+1)>>1); ng++){
// This runs from floor((nf+1)/2) to ceil((nf+1)/2)
fp2_t g[ng];
for(e = 0; e < nf; e++){
fp2_random(&f[e]);
}
for(e = 0; e < ng; e++){
fp2_random(&g[e]);
}
fp2_t h[nf+ng-1];
slow_mul(h, g, ng, f, nf);
poly_mul_middle(g, g, ng, f, nf);
for(e = 0; e < ng; e++){
assert(fp2_isequal(h[e+nf-ng], g[e])==1);
}
}
}
printf("[%3d%%] Tested poly_mul_middle:\t\t\tNo errors!\n", 100 * nf / (2*nmax));
// TEST FOR SELF RECIPROCAL MULTIPLICATION
for(nf = 1; nf < nmax; nf++){
printf("[%3d%%] Testing self reciprocal mul", 100 * nf / nmax);
fflush(stdout);
printf("\r\x1b[K");
for(ng = 1; ng < nmax; ng++){
fp2_t f[nf], g[ng], h[nf+ng-1], fg[nf+ng-1];
// Get random palyndromes
for(e = 0; e < (nf>>1); e++){
fp2_random(&f[e]);
fp2_copy(&f[nf-1-e], &f[e]);
}
if(nf & 1){
fp2_random(&f[nf>>1]);
}
for(e = 0; e < (ng>>1); e++){
fp2_random(&g[e]);
fp2_copy(&g[ng-1-e], &g[e]);
}
if(ng & 1){
fp2_random(&g[ng>>1]);
}
// Compute products
poly_mul_selfreciprocal(h, g, ng, f, nf);
slow_mul(fg, g, ng, f, nf);
// Compare
for(e = 0; e < nf+ng-1; e++){
assert(fp2_isequal(fg[e], h[e])==1);
}
}
}
printf("[%3d%%] Tested self reciprocal mul:\t\tNo errors!\n", 100 * nf / nmax);
// TEST FOR PRODUCT TREES
int tree_size, iteration, i;
int len, *DEG, LENF;
poly *H, *F, h;
for(tree_size = 1; tree_size < nmax; tree_size++){
printf("[%3d%%] Testing product tree:\t\t\tSize %d out of %d", 100 * tree_size / nmax, tree_size, nmax-1);
fflush(stdout);
printf("\r\x1b[K");
i = 0;
while((1<<i) < tree_size){
i++;
}
DEG = malloc(sizeof(int)*((1<<(i+2))-1));
H = malloc(sizeof(poly)*((1<<(i+2))-1));
F = malloc(sizeof(poly)*tree_size);
h = malloc(sizeof(fp2_t)*(nmax+1)*tree_size);
for(iteration = 0; iteration < nmax + 1 - tree_size ; iteration++){
// Generate random list of polynomials
LENF = (rand() % nmax)+1;
for(i = 0; i < tree_size; i++){
F[i] = malloc(sizeof(fp2_t)*LENF);
for(e = 0; e < LENF; e++){
fp2_random(&F[i][e]);
}
}
product_tree(H, DEG, 0, F, LENF, tree_size);
// Build product of all polynomials manually
len = LENF;
//for(e = 0; e < LENF[0]; e++){
for(e = 0; e < LENF; e++){
fp2_copy(&h[e], &F[0][e]);
}
for(i = 1; i < tree_size; i++){
poly_mul(h, h, len, F[i], LENF);
len += LENF-1;
}
// Compare to root
assert (len == DEG[0]+1);
for(e = 0; e < len; e++){
assert(fp2_isequal(H[0][e], h[e])==1);
}
clear_tree(H, 0, tree_size);
for(i = 0; i < tree_size; i++){
free(F[i]);
}
}
free(DEG);
free(H);
free(F);
free(h);
}
printf("[%3d%%] Tested product tree:\t\t\tNo errors!\n", 100 * tree_size / nmax);
// TEST FOR SELF RECIPROCAL PRODUCT TREES
for(tree_size = 1; tree_size < nmax; tree_size++){
printf("[%3d%%] Testing selfreciprocal product tree:\tSize %d out of %d", 100 * tree_size / nmax, tree_size, nmax-1);
fflush(stdout);
printf("\r\x1b[K");
i = 0;
while((1<<i) < tree_size){
i++;
}
DEG = malloc(sizeof(int)*((1<<(i+2))-1));
H = malloc(sizeof(poly)*((1<<(i+2))-1));
F = malloc(sizeof(poly)*tree_size);
h = malloc(sizeof(fp2_t)*(nmax+1)*tree_size);
for(iteration = 0; iteration < nmax + 1 - tree_size ; iteration++){
// Generate random list of polynomials
LENF = (rand() % nmax)+1;;
for(i = 0; i < tree_size; i++){
F[i] = malloc(sizeof(fp2_t)*LENF);
for(e = 0; e < (LENF>>1); e++){
fp2_random(&F[i][e]);
fp2_copy(&F[i][LENF-1-e], &F[i][e]);
}
if(LENF & 1){
fp2_random(&F[i][(LENF>>1)]);
}
}
product_tree_selfreciprocal(H, DEG, 0, F, LENF, tree_size);
// Build product of all polynomials manually
len = LENF;
for(e = 0; e < LENF; e++){
fp2_copy(&h[e], &F[0][e]);
}
for(i = 1; i < tree_size; i++){
poly_mul(h, h, len, F[i], LENF);
len += LENF-1;
}
// Compare to root
assert (len == DEG[0]+1);
for(e = 0; e < len; e++){
assert(fp2_isequal(H[0][e], h[e])==1);
}
clear_tree(H, 0, tree_size);
for(i = 0; i < tree_size; i++){
free(F[i]);
}
}
free(DEG);
free(H);
free(F);
free(h);
}
printf("[%3d%%] Tested selfreciprocal product tree:\tNo errors!\n", 100 * tree_size / nmax);
printf("-- All tests passed.\n");
return 0;
}

View File

@@ -1,461 +0,0 @@
#include "poly.h"
#include <assert.h>
#include <stdio.h>
#define nmax 32
bool fp2_isequal(fp2_t a, fp2_t b){
return fp_is_equal(a.re, b.re) && fp_is_equal(a.im, b.im);
}
// VERY NOT SECURE (testing only)
void fp2_random(fp2_t *a){
for(int i = 0; i < NWORDS_FIELD; i++){
a->re[i] = rand();
a->im[i] = rand();
}
// Normalize
fp2_t one;
fp_mont_setone(one.re);fp_set(one.im,0);
fp2_mul(&*a, &*a, &one);
// Update seed
srand((unsigned) a->re[0]);
}
int main(){
fp2_t fp2_0, fp2_1;
fp2_set(&fp2_0, 0);
fp_mont_setone(fp2_1.re);fp_set(fp2_1.im,0);
int lenf, leng, n, e, iteration, array_size, tree_size, i, root, brother, *DEG, LENF;
poly f, g, h, f_rev, f_rev_inv, *F, *H, *R, g1, g2, REM1, REM2, G1, G2, G1_rev, G2_rev, R0;
fp2_t c, *A, *C, ratio, A0;
f_rev_inv = 0;
// TEST FOR RECIPROCAL
for(lenf = 1; lenf < nmax; lenf++)
{
printf("[%3d%%] Testing reciprocals", 100 * lenf / nmax);
fflush(stdout);
printf("\r\x1b[K");
// Get random poly
f = malloc(sizeof(fp2_t)*lenf);
for(e = 0; e < lenf; e++)
fp2_random(&f[e]);
for(n = 1; n < nmax; n++)
{
// Get the reciprocal and multiply them
h = malloc(sizeof(fp2_t)*n);
memset(h, 0, sizeof(fp2_t)*n);
reciprocal(h, &c, f, lenf, n);
poly_mul_low(h, n, f, lenf, h, n);
// Compare with expected
assert(fp2_isequal(h[0],c));
for(e = 1; e < n; e++)
assert(fp2_is_zero(&h[e]));
free(h);
}
free(f);
}
printf("[%3d%%] Tested reciprocals:\t\tNo errors!\n", 100 * lenf / nmax);
// TEST FOR REDUCTION
for(lenf = 2; lenf < nmax; lenf++)
{
printf("[%3d%%] Testing polynomial reduction", 100 * lenf / nmax);
fflush(stdout);
printf("\r\x1b[K");
// Get random poly for the mod
f = malloc(sizeof(fp2_t)*lenf);
f_rev = malloc(sizeof(fp2_t)*lenf);
for(e = 0; e < lenf; e++)
{
fp2_random(&f[e]);
fp2_copy(&f_rev[lenf-1-e], &f[e]);
}
for(leng = 1; leng < nmax; leng++)
{
// Get random poly to reduce
g = malloc(sizeof(fp2_t)*leng);
for(e = 0; e < leng; e++){
fp2_random(&g[e]);
}
// Get reverse-inverse mod x^(leng-lenf+1)
if(leng >= lenf)
{
f_rev_inv = malloc(sizeof(fp2_t)*(leng-lenf+1));
reciprocal(f_rev_inv, &c, f_rev, lenf, leng-lenf+1);
}
else{
fp_mont_setone(c.re);fp_set(c.im,0);
}
// Compute the reduction
h = malloc(sizeof(fp2_t)*(lenf-1));
poly_redc(h, g, leng, f, lenf, f_rev_inv, c);
// Reduce manually
int leng_red = leng;
fp2_t scale, f_e;
while(leng_red >= lenf)
{
fp2_copy(&scale, &f[lenf-1]);
fp2_inv(&scale);
fp2_mul(&scale, &scale, &g[leng_red-1]);
for(e = 0; e < lenf; e++)
{
fp2_mul(&f_e, &f[e], &scale);
fp2_sub(&g[e+leng_red-lenf], &g[e+leng_red-lenf], &f_e);
}
leng_red--;
}
// Rescale manual result
if( leng < lenf){
fp_mont_setone(scale.re);fp_set(scale.im,0);
}
else
if(lenf == 2 && leng == 3)
{
fp2_sqr(&scale, &f[1]);
fp2_add(&scale, &scale, &scale);
}
else
fp2_copy(&scale, &c);
for(e = 0; e < leng_red; e++)
fp2_mul(&g[e], &g[e], &scale);
// Comapre results
for(e = leng_red-1; e >= 0; e--)
assert(fp2_isequal(h[e], g[e]));
for(e = leng_red; e < lenf-1; e++)
assert(fp2_is_zero(&h[e]));
free(g);
free(h);
if(leng >= lenf)
free(f_rev_inv);
}
free(f);
free(f_rev);
}
printf("[%3d%%] Tested polynomial reduction:\tNo errors!\n", 100 * lenf / nmax);
// TEST FOR RECIPROCAL TREES
for(tree_size = 3; tree_size < nmax; tree_size++)
{
printf("[%3d%%] Testing reciprocal tree:\t\tTree size %d out of %d", 100 * tree_size / nmax, tree_size, nmax);
fflush(stdout);
printf("\r\x1b[K");
// Compute size of arrays
i = 0;
while((1<<i) < tree_size){
i++;
}
array_size = (1<<(i+2))-1;
DEG = malloc(sizeof(int)*array_size);
H = malloc(sizeof(poly)*array_size);
R = malloc(sizeof(poly)*array_size);
F = malloc(sizeof(poly)*tree_size);
A = malloc(sizeof(fp2_t)*array_size);
// Get random polys
LENF = 2;
for(i = 0; i < tree_size; i++)
{
F[i] = malloc(sizeof(fp2_t)*LENF);
for(e = 0; e < LENF; e++){
fp2_random(&F[i][e]);
}
}
// Get product tree then reciprocal tree
product_tree(H, DEG, 0, F, LENF, tree_size);
leng = DEG[0]+1+(rand() % nmax);
reciprocal_tree(R, A, leng, H, DEG, 0, tree_size);
// Check the root
root = 0;
lenf = leng-DEG[root];
f = malloc(sizeof(fp2_t)*lenf);
for(e = 0; e < DEG[root]+1 && e < lenf; e++){
fp2_copy(&f[e], &H[root][DEG[root]-e]);
}
for(e = DEG[root]+1; e < lenf; e++){
fp2_set(&f[e], 0);
}
poly_mul_low(f, lenf, f, lenf, R[root], lenf);
assert(fp2_isequal(f[0], A[root]));
for(e = 1; e < lenf; e++){
assert(fp2_is_zero(&f[e]));
}
free(f);
// Perform random walks
for(iteration = 0; iteration < nmax - tree_size; iteration++)
{
root = 0;
n = tree_size;
while(n > 1)
{
if(rand() & 1)
{
root = 2*root+1;
n = n - (n>>1);
}
else
{
root = 2*root+2;
n = n>>1;
}
brother = root - 1 + 2*(root & 1);
// Check current node
if(DEG[root] > 2)
{
lenf = DEG[brother];
f = malloc(sizeof(fp2_t)*lenf);
for(e = 0; e < DEG[root]+1 && e < lenf; e++){
fp2_copy(&f[e], &H[root][DEG[root]-e]);
}
for(e = DEG[root]+1; e < lenf; e++){
fp2_set(&f[e], 0);
}
poly_mul_low(f, lenf, f, lenf, R[root], lenf);
assert(fp2_isequal(f[0], A[root]));
for(e = 1; e < lenf; e++){
assert(fp2_is_zero(&f[e]));
}
free(f);
}
}
}
// Clean up
for(i = 0; i < tree_size; i++)
free(F[i]);
clear_tree(H, 0, tree_size);
clear_tree(R, 0, tree_size);
free(F);
free(H);
free(R);
free(A);
free(DEG);
}
printf("[%3d%%] Tested reciprocal tree:\t\tNo errors!\n", 100 * tree_size / nmax);
// TEST FOR REMAINDERS
for(tree_size = 2; tree_size < nmax; tree_size++)
{
printf("[%3d%%] Testing batched remainders:\t\tTree size %d out of %d", 100 * tree_size / nmax, tree_size, nmax);
fflush(stdout);
printf("\r\x1b[K");
// Compute size of arrays
i = 0;
while((1<<i) < tree_size)
i++;
array_size = (1<<(i+2))-1;
DEG = malloc(sizeof(int)*array_size);
H = malloc(sizeof(poly)*array_size);
R = malloc(sizeof(poly)*array_size);
F = malloc(sizeof(poly)*tree_size);
A = malloc(sizeof(fp2_t)*array_size);
REM1 = malloc(sizeof(fp2_t)*array_size);
REM2 = malloc(sizeof(fp2_t)*array_size);
C = malloc(sizeof(fp2_t)*tree_size);
// Get random polys
LENF = 2;
for(i = 0; i < tree_size; i++)
{
F[i] = malloc(sizeof(fp2_t)*LENF);
for(e = 0; e < LENF; e++)
fp2_random(&F[i][e]);
}
// Get product tree, reciprocal tree, and remainders
product_tree(H, DEG, 0, F, LENF, tree_size);
leng = DEG[0]+1+(rand() % nmax);
g1 = malloc(sizeof(fp2_t)*leng);
g2 = malloc(sizeof(fp2_t)*leng);
for(e = 0; e < leng; e++)
{
fp2_random(&g1[e]);
fp2_random(&g2[e]);
}
reciprocal_tree(R, A, leng, H, DEG, 0, tree_size);
multieval_unscaled(REM1, g1, leng, R, (const fp2_t*)A, H, DEG, 0, tree_size);
multieval_unscaled(REM2, g2, leng, R, (const fp2_t*)A, H, DEG, 0, tree_size);
for(i = 0; i < tree_size; i++)
{
// Get ratio of the remainder
fp2_inv(&REM1[i]);
fp2_mul(&ratio, &REM1[i], &REM2[i]);
// Compute remainders manually
f_rev = malloc(sizeof(fp2_t)*LENF);
f_rev_inv = malloc(sizeof(fp2_t)*(leng-LENF+1));
h = malloc(sizeof(fp2_t)*(LENF-1));
for(e = 0; e < LENF; e++)
fp2_copy(&f_rev[e], &F[i][LENF-1-e]);
reciprocal(f_rev_inv, &c, f_rev, LENF, leng-LENF+1);
poly_redc(h, g1, leng, F[i], LENF, f_rev_inv, c);
fp2_copy(&REM1[i], &h[0]);
poly_redc(h, g2, leng, F[i], LENF, f_rev_inv, c);
fp2_copy(&REM2[i], &h[0]);
free(f_rev);
free(f_rev_inv);
free(h);
// Compare results
fp2_inv(&REM1[i]);
fp2_mul(&REM1[i], &REM1[i], &REM2[i]);
assert(fp2_isequal(REM1[i], ratio));
}
// Clean up
for(i = 0; i < tree_size; i++)
free(F[i]);
free(g1);
free(g2);
clear_tree(H, 0, tree_size);
clear_tree(R, 0, tree_size);
free(F);
free(H);
free(R);
free(A);
free(DEG);
free(REM1);
free(REM2);
free(C);
}
printf("[%3d%%] Tested batched remainders:\tNo errors!\n", 100 * tree_size / nmax);
// TEST FOR SCALED REMAINDER TREE
for(tree_size = 1; tree_size < nmax; tree_size++)
{
printf("[%3d%%] Testing scaled remainder tree:\tTree size %d out of %d", 100 * tree_size / nmax, tree_size, nmax);
fflush(stdout);
printf("\r\x1b[K");
// Compute size of arrays
i = 0;
while((1<<i) < tree_size)
i++;
array_size = (1<<(i+2))-1;
DEG = malloc(sizeof(int)*array_size);
H = malloc(sizeof(poly)*array_size);
F = malloc(sizeof(poly)*tree_size);
REM1 = malloc(sizeof(fp2_t)*array_size);
REM2 = malloc(sizeof(fp2_t)*array_size);
// Get random polys
LENF = 2;
for(i = 0; i < tree_size; i++)
{
F[i] = malloc(sizeof(fp2_t)*LENF);
for(e = 0; e < LENF; e++)
fp2_random(&F[i][e]);
}
// Get random polys to reduce
product_tree(H, DEG, 0, F, LENF, tree_size);
leng = DEG[0]+1+(rand() % nmax);
g1 = malloc(sizeof(fp2_t)*leng);
g2 = malloc(sizeof(fp2_t)*leng);
for(e = 0; e < leng; e++)
{
fp2_random(&g1[e]);
fp2_random(&g2[e]);
}
// Get the required initial nodes
G1 = malloc(sizeof(fp2_t)*DEG[0]);
G2 = malloc(sizeof(fp2_t)*DEG[0]);
G1_rev = malloc(sizeof(fp2_t)*DEG[0]);
G2_rev = malloc(sizeof(fp2_t)*DEG[0]);
R0 = malloc(sizeof(fp2_t)*(leng));
f_rev = malloc(sizeof(fp2_t)*(DEG[0]+1));
for(e = 0; e < DEG[0]+1; e++)
fp2_copy(&f_rev[e], &H[0][DEG[0]-e]);
if( DEG[0] > leng-DEG[0])
reciprocal(R0, &A0, f_rev, DEG[0]+1, DEG[0]);
else
reciprocal(R0, &A0, f_rev, DEG[0]+1, leng-DEG[0]);
poly_redc(G1, g1, leng, H[0], DEG[0]+1, R0, A0);
poly_redc(G2, g2, leng, H[0], DEG[0]+1, R0, A0);
for(e = 0; e < DEG[0]; e++)
{
fp2_copy(&G1_rev[e], &G1[DEG[0]-1-e]);
fp2_copy(&G2_rev[e], &G2[DEG[0]-1-e]);
}
poly_mul_middle(G1_rev, G1_rev, DEG[0], R0, DEG[0]);
poly_mul_middle(G2_rev, G2_rev, DEG[0], R0, DEG[0]);
for(e = 0; e < DEG[0]; e++)
{
fp2_copy(&G1[e], &G1_rev[DEG[0]-1-e]);
fp2_copy(&G2[e], &G2_rev[DEG[0]-1-e]);
}
free(G1_rev);free(G2_rev);free(R0);free(f_rev);
// Compute the scaled remainder trees
multieval_scaled(REM1, G1, H, DEG, 0, tree_size);
multieval_scaled(REM2, G2, H, DEG, 0, tree_size);
for(i = 0; i < tree_size; i++)
{
// Get ratio of the remainder
fp2_inv(&REM1[i]);
fp2_mul(&ratio, &REM1[i], &REM2[i]);
// Compute remainders manually
f_rev = malloc(sizeof(fp2_t)*LENF);
f_rev_inv = malloc(sizeof(fp2_t)*(leng-LENF+1));
h = malloc(sizeof(fp2_t)*(LENF-1));
for(e = 0; e < LENF; e++)
fp2_copy(&f_rev[e], &F[i][LENF-1-e]);
reciprocal(f_rev_inv, &c, f_rev, LENF, leng-LENF+1);
poly_redc(h, g1, leng, F[i], LENF, f_rev_inv, c);
fp2_copy(&REM1[i], &h[0]);
poly_redc(h, g2, leng, F[i], LENF, f_rev_inv, c);
fp2_copy(&REM2[i], &h[0]);
free(f_rev);free(f_rev_inv);free(h);
// Compare results
fp2_inv(&REM1[i]);
fp2_mul(&REM1[i], &REM1[i], &REM2[i]);
assert(fp2_isequal(REM1[i], ratio));
}
// Clean up
for(i = 0; i < tree_size; i++)
free(F[i]);
free(F);free(g1);free(g2);free(G1);free(G2);
clear_tree(H, 0, tree_size);free(H);free(DEG);
free(REM1);free(REM2);
}
printf("[%3d%%] Tested scaled remainder tree:\tNo errors!\n", 100 * tree_size / nmax);
printf("-- All tests passed.\n");
}

View File

@@ -1,75 +0,0 @@
#include "test_extras.h"
#include <bench.h>
// Global constants
extern const digit_t p[NWORDS_FIELD];
extern const digit_t R2[NWORDS_FIELD];
#if 0
int64_t cpucycles(void)
{ // Access system counter for benchmarking
unsigned int hi, lo;
asm volatile ("rdtsc\n\t" : "=a" (lo), "=d"(hi));
return ((int64_t)lo) | (((int64_t)hi) << 32);
}
#endif
int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
// SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
int i;
for (i = nwords-1; i >= 0; i--)
{
if (a[i] > b[i]) return 1;
else if (a[i] < b[i]) return -1;
}
return 0;
}
void sub_test(digit_t* out, digit_t* a, digit_t* b, unsigned int nwords)
{ // Subtraction without borrow, out = a-b where a>b
// SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.
unsigned int i;
digit_t res, carry, borrow = 0;
for (i = 0; i < nwords; i++)
{
res = a[i] - b[i];
carry = (a[i] < b[i]);
out[i] = res - borrow;
borrow = carry || (res < borrow);
}
}
void fprandom_test(digit_t* a)
{ // Generating a pseudo-random field element in [0, p-1]
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
unsigned int i, diff = 256-254, nwords = NWORDS_FIELD;
unsigned char* string = NULL;
string = (unsigned char*)a;
for (i = 0; i < sizeof(digit_t)*nwords; i++) {
*(string + i) = (unsigned char)rand(); // Obtain 256-bit number
}
a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
while (compare_words((digit_t*)p, a, nwords) < 1) { // Force it to [0, modulus-1]
sub_test(a, a, (digit_t*)p, nwords);
}
}
void fp2random_test(fp2_t* a)
{ // Generating a pseudo-random element in GF(p^2)
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
fprandom_test(a->re);
fprandom_test(a->im);
}

View File

@@ -1,29 +0,0 @@
#ifndef TEST_EXTRAS_H
#define TEST_EXTRAS_H
#include <time.h>
#include <stdlib.h>
#include <fp.h>
#include <fp2.h>
#include <curve_extras.h>
#define PASSED 0
#define FAILED 1
// Access system counter for benchmarking
//int64_t cpucycles(void);
// Comparing "nword" elements, a=b? : (1) a!=b, (0) a=b
int compare_words(digit_t* a, digit_t* b, unsigned int nwords);
// Multiprecision subtraction for testing, assumes a > b
void sub_test(digit_t* out, digit_t* a, digit_t* b, unsigned int nwords);
// Generating a pseudo-random field element in [0, p-1]
void fprandom_test(digit_t* a);
// Generating a pseudo-random element in GF(p^2)
void fp2random_test(fp2_t* a);
#endif

View File

@@ -1,298 +0,0 @@
#include<time.h>
#include <stdio.h>
#include <assert.h>
#include <inttypes.h>
#include "isog.h"
#include "sdacs.h"
#include "ec.h"
#include "test-basis.h"
void random_scalar(fp_t k, const uint8_t j)
{
for(int i = 0; i < NWORDS_FIELD; i++)
k[i] = rand();
}
// Affine Montgomery coefficient computation (A + 2C : 4C) --> A/C
void coeff(fp2_t *B, ec_point_t const A)
{
fp2_t t;
fp2_add(&t, &A.x, &A.x); // (2 * A24)
fp2_sub(&t, &t, &A.z); // (2 * A24) - C24
fp2_copy(&*B, &A.z);
fp2_inv(&*B); // 1 / (C24)
fp2_add(&t, &t, &t); // 4*A = 2[(2 * A24) - C24]
fp2_mul(&*B, &t, &*B); // A/C = 2[(2 * A24) - C24] / C24
}
// Determines if point is fp2-rational (if not, then it must be a zero trace point)
uint8_t isrational(ec_point_t const T, fp2_t const a)
{
fp2_t XT, tmp, aux, YT_squared;
fp2_copy(&XT, &T.z);
fp2_inv(&XT);
fp2_mul(&XT, &XT, &T.x);
fp2_sqr(&tmp, &XT);
fp2_mul(&aux, &tmp, &XT);
fp2_mul(&tmp, &tmp, &a);
fp2_add(&YT_squared, &tmp, &aux);
fp2_add(&YT_squared, &YT_squared, &XT);
return fp2_is_square(&YT_squared);
}
// ladder3pt computes x(P + [m]Q)
void ladder3pt(ec_point_t *R, fp_t const m, ec_point_t const *P, ec_point_t const *Q, ec_point_t const *PQ, ec_point_t const *A)
{
ec_point_t X0, X1, X2;
copy_point(&X0, Q);
copy_point(&X1, P);
copy_point(&X2, PQ);
int i,j;
uint64_t t;
for (i = 0; i < NWORDS_FIELD; i++)
{
t = 1;
for (j = 0 ; j < 64; j++)
{
swap_points(&X1, &X2, -((t & m[i]) == 0));
xDBLADD(&X0, &X1, &X0, &X1, &X2, A);
swap_points(&X1, &X2, -((t & m[i]) == 0));
t <<= 1;
};
};
copy_point(R, &X1);
}
// The projective x-coordinate point (X : Z) at infinity is such that Z == 0
static inline int isinfinity(ec_point_t const P)
{
return fp2_is_zero(&P.z);
}
int main()
{
fp2_t fp2_0, fp2_1;
fp2_set(&fp2_0, 0);
fp_mont_setone(fp2_1.re);fp_set(fp2_1.im,0);
int i, j;
ec_point_t A, B, T;
fp2_set(&A.x, 0);
fp_mont_setone(A.z.re);fp_set(A.z.im,0);
// fp2_add(&A.x, &A.z, &A.x); // 1
// fp2_add(&A.x, &A.x, &A.x); // 2
// fp2_add(&A.x, &A.z, &A.x); // 3
// fp2_add(&A.x, &A.x, &A.x); // 6
fp2_add(&A.z, &A.z, &A.z); // 2C
fp2_add(&A.x, &A.x, &A.z); // A' + 2C
fp2_add(&A.z, &A.z, &A.z); // 4C
// Just to ensure the projective curve coeffientes are different from zero
assert( !fp2_is_zero(&A.x) & !fp2_is_zero(&A.x) );
fp2_t a;
coeff(&a, A);
ec_point_t PA, QA, PQA, PB, QB, PQB, RA, RB;
// Writing the public projective x-coordinate points into Montogmery domain
fp2_tomont(&(PA.x), &(xPA));
fp_mont_setone(PA.z.re);fp_set(PA.z.im,0);
fp2_tomont(&(QA.x), &(xQA));
fp_mont_setone(QA.z.re);fp_set(QA.z.im,0);
fp2_tomont(&(PQA.x), &(xPQA));
fp_mont_setone(PQA.z.re);fp_set(PQA.z.im,0);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
fp2_tomont(&(PB.x), &(xPB));
fp_mont_setone(PB.z.re);fp_set(PB.z.im,0);
fp2_tomont(&(QB.x), &(xQB));
fp_mont_setone(QB.z.re);fp_set(QB.z.im,0);
fp2_tomont(&(PQB.x), &(xPQB));
fp_mont_setone(PQB.z.re);fp_set(PQB.z.im,0);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
// ======================================================================================================
// Recall, PA, QA, and PQA are expeted to be N-order points, but we require to ensure they are of order N
for (j = 0; j < P_LEN; j++)
{
for (i = 1; i < TORSION_ODD_POWERS[j]; i++)
{
xMULv2(&PA, &PA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&QA, &QA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&PQA, &PQA, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
};
};
assert( !isinfinity(PA) );
assert( !isinfinity(QA) );
assert( !isinfinity(PQA) );
// --------------------------------------------------------------
fp_t m;
random_scalar(m, 0);
ladder3pt(&RA, m, &PA, &QA, &PQA, &A);
for (i = 0; i < P_LEN; i++)
{
printf("// Processing the %d-th prime:\t", i + 1);
printf("%2d%%", 100 * i / (int)P_LEN);
fflush(stdout);
printf("\r\x1b[K");
copy_point(&T, &RA);
for (j = (i+1); j < P_LEN; j++)
xMULv2(&T, &T, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( !isinfinity(T) );
kps(i, T, A);
if (TORSION_ODD_PRIMES[i] > gap)
printf("[\033[0;31m%7" PRId64 "\033[0m] (#I: %3d, #J: %3d, #K: %3d) \n", TORSION_ODD_PRIMES[i], sI, sJ, sK);
else
printf("[\033[0;31m%7" PRId64 "\033[0m] --------------------------- \n", TORSION_ODD_PRIMES[i]);
xisog(&B, i, A);
xeval(&PB, i, PB, A);
coeff(&a, B);
assert( !isinfinity(PB) );
assert( !isrational(PB, a) );
xeval(&RA, i, RA, A);
assert( (!isinfinity(RA) && (i < (P_LEN - 1))) || (isinfinity(RA) && (i == (P_LEN - 1))) );
assert( (isrational(RA, a) && (i < (P_LEN - 1))) || (isinfinity(RA) && (i == (P_LEN - 1))) );
copy_point(&A, &B);
// Verifying the order of the image point of PA has been reduced
copy_point(&T, &RA);
for (j = (i+1); j < P_LEN; j++)
xMULv2(&T, &T, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(T) );
kps_clear(i);
};
fp2_set(&A.x, 0);
fp_mont_setone(A.z.re);fp_set(A.z.im,0);
// fp2_add(&A.x, &A.z, &A.x); // 1
// fp2_add(&A.x, &A.x, &A.x); // 2
// fp2_add(&A.x, &A.z, &A.x); // 3
// fp2_add(&A.x, &A.x, &A.x); // 6
fp2_add(&A.z, &A.z, &A.z); // 2C
fp2_add(&A.x, &A.x, &A.z); // A' + 2C
fp2_add(&A.z, &A.z, &A.z); // 4C
// Just to ensure the projective curve coeffientes are different from zero
assert( !fp2_is_zero(&A.x) & !fp2_is_zero(&A.x) );
coeff(&a, A);
// Writing the public projective x-coordinate points into Montogmery domain
fp2_tomont(&(PA.x), &(xPA));
fp_mont_setone(PA.z.re);fp_set(PA.z.im,0);
fp2_tomont(&(QA.x), &(xQA));
fp_mont_setone(QA.z.re);fp_set(QA.z.im,0);
fp2_tomont(&(PQA.x), &(xPQA));
fp_mont_setone(PQA.z.re);fp_set(PQA.z.im,0);
assert( isrational(PA, a) );
assert( isrational(QA, a) );
assert( isrational(PQA, a) );
fp2_tomont(&(PB.x), &(xPB));
fp_mont_setone(PB.z.re);fp_set(PB.z.im,0);
fp2_tomont(&(QB.x), &(xQB));
fp_mont_setone(QB.z.re);fp_set(QB.z.im,0);
fp2_tomont(&(PQB.x), &(xPQB));
fp_mont_setone(PQB.z.re);fp_set(PQB.z.im,0);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
// ======================================================================================================
// Recall, PA, QA, and PQA are expeted to be N-order points, but we require to ensure they are of order N
for (j = P_LEN; j < (P_LEN+M_LEN); j++)
{
for (i = 1; i < TORSION_ODD_POWERS[j]; i++)
{
xMULv2(&PB, &PB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&QB, &QB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
xMULv2(&PQB, &PQB, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( !isrational(PB, a) );
assert( !isrational(QB, a) );
assert( !isrational(PQB, a) );
};
};
assert( !isinfinity(PB) );
assert( !isinfinity(QB) );
assert( !isinfinity(PQB) );
random_scalar(m, 1);
ladder3pt(&RB, m, &PB, &QB, &PQB, &A);
for (i = P_LEN; i < (P_LEN+M_LEN); i++)
{
printf("// Processing the %d-th prime:\t", i + 1);
printf("%2d%%", 100 * i / (int)(P_LEN+M_LEN));
fflush(stdout);
printf("\r\x1b[K");
copy_point(&T, &RB);
for (j = (i+1); j < (P_LEN+M_LEN); j++)
xMULv2(&T, &T, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( !isinfinity(T) );
kps(i, T, A);
if (TORSION_ODD_PRIMES[i] > gap)
printf("[\033[0;31m%7" PRId64 "\033[0m] (#I: %3d, #J: %3d, #K: %3d) \n", TORSION_ODD_PRIMES[i], sI, sJ, sK);
else
printf("[\033[0;31m%7" PRId64 "\033[0m] --------------------------- \n", TORSION_ODD_PRIMES[i]);
xisog(&B, i, A);
xeval(&PA, i, PA, A);
coeff(&a, B);
assert( !isinfinity(PA) );
assert( isrational(PA, a) );
xeval(&RB, i, RB, A);
assert( (!isinfinity(RB) && (i < (P_LEN + M_LEN - 1))) || (isinfinity(RB) && (i == (P_LEN + M_LEN - 1))) );
assert( (!isrational(RB, a) && (i < (P_LEN + M_LEN - 1))) || (isinfinity(RB) && (i == (P_LEN + M_LEN - 1))) );
copy_point(&A, &B);
// Verifying the order of the image point of PB has been reduced
copy_point(&T, &RB);
for (j = (i+1); j < (P_LEN+M_LEN); j++)
xMULv2(&T, &T, &(TORSION_ODD_PRIMES[j]), p_plus_minus_bitlength[j], &A);
assert( isinfinity(T) );
kps_clear(i);
};
printf("-- All tests passed!\n");
return 0;
}

View File

@@ -1,299 +0,0 @@
#include "isog.h"
#include "ec.h"
#include <assert.h>
// -----------------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------------
// Traditional isogeny evaluation (xEVAL)
// CrissCross procedure as described in Hisil and Costello paper
void CrissCross(fp2_t *r0, fp2_t *r1, fp2_t const alpha, fp2_t const beta, fp2_t const gamma, fp2_t const delta)
{
fp2_t t_1, t_2;
fp2_mul(&t_1, &alpha, &delta);
fp2_mul(&t_2, &beta, &gamma);
fp2_add(&*r0, &t_1, &t_2);
fp2_sub(&*r1, &t_1, &t_2);
}
// Degree-2 isogeny evaluation with kenerl generated by P != (0, 0)
void xeval_2(ec_point_t* R, ec_point_t* const Q, const int lenQ)
{
fp2_t t0, t1, t2;
for(int j = 0; j < lenQ; j++){
fp2_add(&t0, &Q[j].x, &Q[j].z);
fp2_sub(&t1, &Q[j].x, &Q[j].z);
fp2_mul(&t2, &K[0].x, &t1);
fp2_mul(&t1, &K[0].z, &t0);
fp2_add(&t0, &t2, &t1);
fp2_sub(&t1, &t2, &t1);
fp2_mul(&R[j].x, &Q[j].x, &t0);
fp2_mul(&R[j].z, &Q[j].z, &t1);
}
}
// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P != (0, 0)
void xeval_4(ec_point_t* R, const ec_point_t* Q, const int lenQ)
{
fp2_t t0, t1;
for(int i = 0; i < lenQ; i++){
fp2_add(&t0, &Q[i].x, &Q[i].z);
fp2_sub(&t1, &Q[i].x, &Q[i].z);
fp2_mul(&(R[i].x), &t0, &K[1].x);
fp2_mul(&(R[i].z), &t1, &K[2].x);
fp2_mul(&t0, &t0, &t1);
fp2_mul(&t0, &t0, &K[0].x);
fp2_add(&t1, &(R[i].x), &(R[i].z));
fp2_sub(&(R[i].z), &(R[i].x), &(R[i].z));
fp2_sqr(&t1, &t1);
fp2_sqr(&(R[i].z), &(R[i].z));
fp2_add(&(R[i].x), &t0, &t1);
fp2_sub(&t0, &t0, &(R[i].z));
fp2_mul(&(R[i].x), &(R[i].x), &t1);
fp2_mul(&(R[i].z), &(R[i].z), &t0);
}
}
// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P = (0, 0)
// Must call after xisog_4_singular
void xeval_4_singular(ec_point_t* R, const ec_point_t* Q, const int lenQ, const ec_point_t P)
{
fp2_t t0, t1, t2;
for(int i = 0; i < lenQ; i++){
fp2_add(&t0, &Q[i].x, &Q[i].z);
fp2_sub(&t2, &Q[i].x, &Q[i].z);
fp2_sqr(&t0, &t0);
fp2_sqr(&t2, &t2);
fp2_sub(&R[i].z, &t0, &t2);
if(fp2_is_equal(&P.x, &P.z)){
// Branch for P = (+1,_)
fp2_copy(&t1, &t2);
}
else{
// Branch for P = (-1,_)
fp2_copy(&t1, &t0);
fp2_copy(&t0, &t2);
}
fp2_mul(&R[i].x, &R[i].z, &K[0].x);
fp2_mul(&R[i].z, &R[i].z, &K[1].x);
fp2_mul(&R[i].z, &R[i].z, &t1);
fp2_mul(&t1, &t1, &K[0].z);
fp2_add(&R[i].x, &R[i].x, &t1);
fp2_mul(&R[i].x, &R[i].x, &t0);
}
}
// Isogeny evaluation on Montgomery curves
// Recall: K has been computed in Twisted Edwards model and none extra additions are required.
void xeval_t(ec_point_t* Q, uint64_t const i, ec_point_t const P)
{
int j;
int d = ((int)TORSION_ODD_PRIMES[i] - 1) / 2; // Here, l = 2d + 1
fp2_t R0, R1, S0, S1, T0, T1;
fp2_add(&S0, &P.x, &P.z);
fp2_sub(&S1, &P.x, &P.z);
CrissCross(&R0, &R1, K[0].z, K[0].x, S0, S1);
for (j = 1; j < d; j++)
{
CrissCross(&T0, &T1, K[j].z, K[j].x, S0, S1);
fp2_mul(&R0, &T0, &R0);
fp2_mul(&R1, &T1, &R1);
};
fp2_sqr(&R0, &R0);
fp2_sqr(&R1, &R1);
fp2_mul(&(Q->x), &P.x, &R0);
fp2_mul(&(Q->z), &P.z, &R1);
}
// -----------------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------------
// Isogeny evaluation (xEVAL) used in velu SQRT
void xeval_s(ec_point_t* Q, uint64_t const i, ec_point_t const P, ec_point_t const A)
{
// =================================================================================
assert(TORSION_ODD_PRIMES[i] > gap); // Ensuring velusqrt is used for l_i > gap
sI = sizeI[i]; // size of I
sJ = sizeJ[i]; // size of J
sK = sizeK[i]; // size of K
assert(sI >= sJ); // Ensuring #I >= #J
assert(sK >= 0); // Recall, it must be that #K >= 0
assert(sJ > 1); // ensuring sI >= sJ > 1
// =================================================================================
// We require the curve coefficient A = A'/C ... well, a multiple of these ones
fp2_t Ap;
fp2_add(&Ap, &A.x, &A.x); // 2A' + 4C
fp2_sub(&Ap, &Ap, &A.z); // 2A'
fp2_add(&Ap, &Ap, &Ap); // 4A'
// --------------------------------------------------------------------------------------------------
// ~~~~~~~~
// | |
// Computing E_J(W) = | | [ F0(W, x([j]P)) * alpha^2 + F1(W, x([j]P)) * alpha + F2(W, x([j]P)) ]
// j in J
// In order to avoid costly inverse computations in fp, we are gonna work with projective coordinates
// In particular, for a degree-l isogeny construction, we need alpha = X/Z and alpha = Z/X (i.e., 1/alpha)
//fp2_t EJ_0[sJ][3]; // EJ_0[j][2] factors of one polynomial to be used in a resultant
fp2_t XZ_add, XZj_add,
XZ_sub, XZj_sub,
AXZ2,
CXZ2,
CX2Z2,
t1, t2;
fp2_add(&XZ_add, &P.x, &P.z); // X + Z
fp2_sub(&XZ_sub, &P.x, &P.z); // X - Z
fp2_mul(&AXZ2, &P.x, &P.z); // X * Z
fp2_sqr(&t1, &P.x); // X ^ 2
fp2_sqr(&t2, &P.z); // Z ^ 2
fp2_add(&CX2Z2, &t1, &t2); // X^2 + Z^2
fp2_mul(&CX2Z2, &CX2Z2, &A.z); // C * (X^2 + Z^2)
fp2_add(&AXZ2, &AXZ2, &AXZ2); // 2 * (X * Z)
fp2_mul(&CXZ2, &AXZ2, &A.z); // C * [2 * (X * Z)]
fp2_mul(&AXZ2, &AXZ2, &Ap); // A' * [2 * (X * Z)]
int j;
for (j = 0; j < sJ; j++)
{
fp2_add(&XZj_add, &J[j].x, &J[j].z); // Xj + Zj
fp2_sub(&XZj_sub, &J[j].x, &J[j].z); // Xj - Zj
fp2_mul(&t1, &XZ_sub, &XZj_add); // (X - Z) * (Xj + Zj)
fp2_mul(&t2, &XZ_add, &XZj_sub); // (X + Z) * (Xj - Zj)
// ...................................
// Computing the quadratic coefficient
fp2_sub(&EJ_0[j][2], &t1, &t2); // 2 * [(X*Zj) - (Z*Xj)]
fp2_sqr(&EJ_0[j][2], &EJ_0[j][2]); // ( 2 * [(X*Zj) - (Z*Xj)] )^2
fp2_mul(&EJ_0[j][2], &A.z, &EJ_0[j][2]); // C * ( 2 * [(X*Zj) - (Z*Xj)] )^2
// ..................................
// Computing the constant coefficient
fp2_add(&EJ_0[j][0], &t1, &t2); // 2 * [(X*Xj) - (Z*Zj)]
fp2_sqr(&EJ_0[j][0], &EJ_0[j][0]); // ( 2 * [(X*Xj) - (Z*Zj)] )^2
fp2_mul(&EJ_0[j][0], &A.z, &EJ_0[j][0]); // C * ( 2 * [(X*Xj) - (Z*Zj)] )^2
// ................................
// Computing the linear coefficient
// C * [ (-2*Xj*Zj)*(alpha^2 + 1) + (-2*alpha)*(Xj^2 + Zj^2)] + [A' * (-2*Xj*Zj) * (2*X*Z)] where alpha = X/Z
fp2_add(&t1, &J[j].x, &J[j].z); // (Xj + Zj)
fp2_sqr(&t1, &t1); // (Xj + Zj)^2
fp2_add(&t1, &t1, &t1); // 2 * (Xj + Zj)^2
fp2_add(&t1, &t1, &XZJ4[j]); // 2 * (Xj + Zj)^2 - (4*Xj*Zj) := 2 * (Xj^2 + Zj^2)
fp2_mul(&t1, &t1, &CXZ2); // [2 * (Xj^2 + Zj^2)] * (2 * [ C * (X * Z)])
fp2_mul(&t2, &CX2Z2, &XZJ4[j]); // [C * (X^2 + Z^2)] * (-4 * Xj * Zj)
fp2_sub(&t1, &t2, &t1); // [C * (X^2 + Z^2)] * (-4 * Xj * Zj) - [2 * (Xj^2 + Zj^2)] * (2 * [ C * (X * Z)])
fp2_mul(&t2, &AXZ2, &XZJ4[j]); // (2 * [A' * (X * Z)]) * (-4 * Xj * Zj)
fp2_add(&EJ_0[j][1], &t1, &t2); // This is our desired equation but multiplied by 2
fp2_add(&EJ_0[j][1], &EJ_0[j][1], &EJ_0[j][1]); // This is our desired equation but multiplied by 4
};
// ---------------------------------------------------------------------
// The faster way for multiplying is using a divide-and-conquer approach
// product tree of EJ_0 (we only require the root)
product_tree_LENFeq3(ptree_EJ, deg_ptree_EJ, 0, EJ_0, sJ);
assert( deg_ptree_EJ[0] == (2*sJ) );
if (!scaled)
{
// unscaled remainder tree approach
multieval_unscaled(leaves, ptree_EJ[0], 2*sJ + 1, rtree_hI, (const fp2_t*)rtree_A, ptree_hI, deg_ptree_hI, 0, sI);
}
else
{
// scaled remainder tree approach
fp2_t G[sI_max], G_rev[sI_max];
poly_redc(G, ptree_EJ[0], 2*sJ + 1, ptree_hI[0], sI + 1, R0, A0);
for (j = 0; j < sI; j++)
fp2_copy(&G_rev[j], &G[sI - 1 - j]);
poly_mul_middle(G_rev, G_rev, sI, R0, sI);
for (j = 0; j < sI; j++)
fp2_copy(&G[j], &G_rev[sI - 1 - j]);
multieval_scaled(leaves, G, ptree_hI, deg_ptree_hI, 0, sI);
};
// Finally, we must multiply the leaves of the outpur of remainders
fp2_t r0;
product(&r0, (const fp2_t*)leaves, sI);
// EJ_1 is just reverting the ordering in the coefficients of EJ_0
for (j = 0; j < sJ; j++){
fp2_copy(&t1, &ptree_EJ[0][j]);
fp2_copy(&ptree_EJ[0][j], &ptree_EJ[0][2*sJ - j]);
fp2_copy(&ptree_EJ[0][2*sJ - j], &t1);
}
if (!scaled)
{
// unscaled remainder tree approach
multieval_unscaled(leaves, ptree_EJ[0], 2*sJ + 1, rtree_hI, (const fp2_t*)rtree_A, ptree_hI, deg_ptree_hI, 0, sI);
}
else
{
// scaled remainder tree approach
fp2_t G[sI_max], G_rev[sI_max];
poly_redc(G, ptree_EJ[0], 2*sJ + 1, ptree_hI[0], sI + 1, R0, A0);
for (j = 0; j < sI; j++)
fp2_copy(&G_rev[j], &G[sI - 1 - j]);
poly_mul_middle(G_rev, G_rev, sI, R0, sI);
for (j = 0; j < sI; j++)
fp2_copy(&G[j], &G_rev[sI - 1 - j]);
multieval_scaled(leaves, G, ptree_hI, deg_ptree_hI, 0, sI);
};
clear_tree(ptree_EJ, 0, sJ);
// Finally, we must multiply the leaves of the outpur of remainders
fp2_t r1;
product(&r1, (const fp2_t*)leaves, sI);
// -------------------------------
// Sometimes the public value sK is equal to zero,
// Thus for avoing runtime error we add one when sK =0
fp2_t hK_0[sK_max + 1], hK_1[sK_max + 1], hk_0, hk_1;
for (j = 0; j < sK; j++)
{
fp2_add(&XZj_add, &K[j].x, &K[j].z); // Xk + Zk
fp2_sub(&XZj_sub, &K[j].x, &K[j].z); // Xk - Zk
fp2_mul(&t1, &XZ_sub, &XZj_add); // (X - Z) * (Xk + Zk)
fp2_mul(&t2, &XZ_add, &XZj_sub); // (X + Z) * (Xk - Zk)
// Case alpha = X/Z
fp2_sub(&hK_0[j], &t1, &t2); // 2 * [(X*Zk) - (Z*Xk)]
// Case 1/alpha = Z/X
fp2_add(&hK_1[j], &t1, &t2); // 2 * [(X*Xk) - (Z*Zk)]
};
// hk_0 <- use product to mulitiply all the elements in hK_0
product(&hk_0, (const fp2_t*)hK_0, sK);
// hk_1 <- use product to mulitiply all the elements in hK_1
product(&hk_1, (const fp2_t*)hK_1, sK);
// ---------------------------------------------------------------------------------
// Now, unifying all the computations
fp2_mul(&t1, &hk_1, &r1); // output of algorithm 2 with 1/alpha = Z/X and without the demoninator
fp2_sqr(&t1, &t1);
fp2_mul(&(Q->x), &t1, &P.x);
fp2_mul(&t2, &hk_0, &r0); // output of algorithm 2 with alpha = X/Z and without the demoninator
fp2_sqr(&t2, &t2);
fp2_mul(&(Q->z), &t2, &P.z);
}

View File

@@ -1,295 +0,0 @@
#include "isog.h"
#include "ec.h"
#include <assert.h>
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
// Degree-2 isogeny with kernel generated by P != (0 ,0)
// Outputs the curve coefficient in the form A24=(A+2C:4C)
void xisog_2(ec_point_t* B, ec_point_t const P)
{
fp2_sqr(&B->x, &P.x);
fp2_sqr(&B->z, &P.z);
fp2_sub(&B->x, &B->z, &B->x);
fp2_add(&K[0].x, &P.x, &P.z);
fp2_sub(&K[0].z, &P.x, &P.z);
}
// Degree-4 isogeny with kernel generated by P such that [2]P != (0 ,0)
// Outputs the curve coefficient in the form A24=(A+2C:4C)
void xisog_4(ec_point_t* B, ec_point_t const P)
{
fp2_sqr(&K[0].x, &P.x);
fp2_sqr(&K[0].z, &P.z);
fp2_add(&K[1].x, &K[0].z, &K[0].x);
fp2_sub(&K[1].z, &K[0].z, &K[0].x);
fp2_mul(&B->x, &K[1].x, &K[1].z);
fp2_sqr(&B->z, &K[0].z);
// Constants for xeval_4
fp2_add(&K[2].x, &P.x, &P.z);
fp2_sub(&K[1].x, &P.x, &P.z);
fp2_add(&K[0].x, &K[0].z, &K[0].z);
fp2_add(&K[0].x, &K[0].x, &K[0].x);
}
// Degree-4 isogeny with kernel generated by P such that [2]P = (0 ,0)
void xisog_4_singular(ec_point_t* B24, ec_point_t const P, ec_point_t A24)
{
fp2_copy(&K[0].z, &A24.z);
if(fp2_is_equal(&P.x, &P.z)){
// Case for P=(1,_)
fp2_copy(&K[0].x, &A24.x);
fp2_sub(&K[1].x, &A24.x, &A24.z);
fp2_neg(&B24->z, &K[1].x);
}
else{
// Case for P=(-1,_)
fp2_copy(&K[1].x, &A24.x);
fp2_sub(&K[0].x, &A24.x, &A24.z);
fp2_neg(&B24->z, &K[0].x);
fp2_copy(&B24->z, &K[1].x);
}
fp2_copy(&B24->x, &K[0].z);
}
// xISOG procedure, which is a hybrid between Montgomery and Twisted Edwards
// This tradition fomulae corresponds with the Twisted Edwards formulae but
// mapping the output into Montgomery form
void xisog_t(ec_point_t* B, uint64_t const i, ec_point_t const A)
{
int j;
int d = ((int)TORSION_ODD_PRIMES[i] - 1) / 2; // Here, l = 2d + 1
fp2_t By, Bz, constant_d_edwards, tmp_a, tmp_d;
fp2_copy(&By, &K[0].x);
fp2_copy(&Bz, &K[0].z);
for (j = 1; j < d; j++)
{
fp2_mul(&By, &By, &K[j].x);
fp2_mul(&Bz, &Bz, &K[j].z);
};
// Mapping Montgomery curve coefficients into Twisted Edwards form
fp2_sub(&constant_d_edwards, &A.x, &A.z);
fp2_copy(&tmp_a, &A.x);
fp2_copy(&tmp_d, &constant_d_edwards);
// left-to-right method for computing a^l and d^l
for (j = 1; j < (int)p_plus_minus_bitlength[i]; j++)
{
fp2_sqr(&tmp_a, &tmp_a);
fp2_sqr(&tmp_d, &tmp_d);
if( ( ((int)TORSION_ODD_PRIMES[i] >> ((int)p_plus_minus_bitlength[i] - j - 1)) & 1 ) != 0 )
{
fp2_mul(&tmp_a, &tmp_a, &A.x);
fp2_mul(&tmp_d, &tmp_d, &constant_d_edwards);
};
};
// raising to 8-th power
for (j = 0; j < 3; j++)
{
fp2_sqr(&By, &By);
fp2_sqr(&Bz, &Bz);
};
// Mapping Twisted Edwards curve coefficients into Montgomery form
fp2_mul(&(B->x), &tmp_a, &Bz);
fp2_mul(&(B->z), &tmp_d, &By);
fp2_sub(&(B->z), &(B->x), &(B->z));
}
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
// Isogeny construction (xISOG) used in velu SQRT
void xisog_s(ec_point_t* B, uint64_t const i, ec_point_t const A)
{
// =================================================================================
assert(TORSION_ODD_PRIMES[i] > gap); // Ensuring velusqrt is used for l_i > gap
sI = sizeI[i]; // size of I
sJ = sizeJ[i]; // size of J
sK = sizeK[i]; // size of K
assert(sI >= sJ); // Ensuring #I >= #J
assert(sK >= 0); // Recall, L is a prime and therefore it must be that #K > 0
assert(sJ > 1); // ensuring sI >= sJ > 1
// =================================================================================
// We require the curve coefficient A = A'/C ... well, a multiple of these ones
fp2_t Ap;
fp2_add(&Ap, &A.x, &A.x); // 2A' + 4C
fp2_sub(&Ap, &Ap, &A.z); // 2A'
fp2_add(&Ap, &Ap, &Ap); // 4A'
fp2_t ADD_SQUARED[sJ_max], // (Xj + Zj)^2
SUB_SQUARED[sJ_max]; // (Xj - Zj)^2
int j;
// Next loop precompute some variables to be used in the reaminder of xisog
for (j = 0; j < sJ; j++)
{
fp2_sub(&SUB_SQUARED[j], &J[j].x, &J[j].z); // (Xj - Zj)
fp2_sqr(&SUB_SQUARED[j], &SUB_SQUARED[j]); // (Xj - Zj)^2
fp2_sub(&ADD_SQUARED[j], &SUB_SQUARED[j], &XZJ4[j]); // (Xj + Zj)^2
};
// --------------------------------------------------------------------------------------------------
// ~~~~~~~~
// | |
// Computing E_J(W) = | | [ F0(W, x([j]P)) * alpha^2 + F1(W, x([j]P)) * alpha + F2(W, x([j]P)) ]
// j in J
// In order to avoid costly inverse computations in fp, we are gonna work with projective coordinates
// In particular, for a degree-l isogeny construction, we need alpha = 1 and alpha = -1
//fp2_t EJ_0[sJ][3], // quadratic factors of one polynomial to be used in a resultant
// EJ_1[sJ][3]; // quadratic factors of one polynomial to be used in a resultant
// Next loop computes all the quadratic factors of EJ_0 and EJ_1
fp2_t t1;
for (j = 0; j < sJ; j++)
{
// Each SUB_SQUARED[j] and ADD_SQUARED[j] should be multiplied by C
fp2_mul(&EJ_1[j][0], &ADD_SQUARED[j], &A.z);
fp2_mul(&EJ_0[j][0], &SUB_SQUARED[j], &A.z);
// We require the double of tadd and tsub
fp2_add(&EJ_0[j][1], &EJ_1[j][0], &EJ_1[j][0]);
fp2_add(&EJ_1[j][1], &EJ_0[j][0], &EJ_0[j][0]);
fp2_mul(&t1, &XZJ4[j], &Ap); // A' *(-4*Xj*Zj)
// Case alpha = 1
fp2_sub(&EJ_0[j][1], &t1, &EJ_0[j][1]);
fp2_copy(&EJ_0[j][2], &EJ_0[j][0]); // E_[0,j} is a palindrome
// Case alpha = -1
fp2_sub(&EJ_1[j][1], &EJ_1[j][1], &t1);
fp2_copy(&EJ_1[j][2], &EJ_1[j][0]); // E_{1,j} is a palindrome
};
// ---------------------------------------------------------------------
// The faster way for multiplying is using a divide-and-conquer approach
// selfreciprocal product tree of EJ_0 (we only require the root)
product_tree_selfreciprocal_LENFeq3(ptree_EJ, deg_ptree_EJ, 0, EJ_0, sJ);
assert( deg_ptree_EJ[0] == (2*sJ) );
if (!scaled)
{
// (unscaled) remainder tree approach
multieval_unscaled(leaves, ptree_EJ[0], 2*sJ + 1, rtree_hI, (const fp2_t*)rtree_A, ptree_hI, deg_ptree_hI, 0, sI);
}
else
{
// scaled remainder tree approach
fp2_t G[sI_max], G_rev[sI_max];
poly_redc(G, ptree_EJ[0], 2*sJ + 1, ptree_hI[0], sI + 1, R0, A0);
for (j = 0; j < sI; j++)
fp2_copy(&G_rev[j], &G[sI - 1 - j]);
poly_mul_middle(G_rev, G_rev, sI, R0, sI);
for (j = 0; j < sI; j++)
fp2_copy(&G[j], &G_rev[sI - 1 - j]);
multieval_scaled(leaves, G, ptree_hI, deg_ptree_hI, 0, sI);
};
clear_tree(ptree_EJ, 0, sJ);
// Finally, we must multiply the leaves of the outpur of remainders
fp2_t r0;
product(&r0, (const fp2_t*)leaves, sI);
// selfreciprocal product tree of EJ_1 (we only require the root)
product_tree_selfreciprocal_LENFeq3(ptree_EJ, deg_ptree_EJ, 0, EJ_1, sJ);
assert( deg_ptree_EJ[0] == (2*sJ) );
if (!scaled)
{
// (unscaled) remainder tree approach
multieval_unscaled(leaves, ptree_EJ[0], 2*sJ + 1, rtree_hI, (const fp2_t*)rtree_A, ptree_hI, deg_ptree_hI, 0, sI);
}
else
{
// scaled remainder tree approach
fp2_t G[sI_max], G_rev[sI_max];
poly_redc(G, ptree_EJ[0], 2*sJ + 1, ptree_hI[0], sI + 1, R0, A0);
for (j = 0; j < sI; j++)
fp2_copy(&G_rev[j], &G[sI - 1 - j]);
poly_mul_middle(G_rev, G_rev, sI, R0, sI);
for (j = 0; j < sI; j++)
fp2_copy(&G[j], &G_rev[sI - 1 - j]);
multieval_scaled(leaves, G, ptree_hI, deg_ptree_hI, 0, sI);
};
clear_tree(ptree_EJ, 0, sJ);
// Finally, we must multiply the leaves of the outpur of remainders
fp2_t r1;
product(&r1, (const fp2_t*)leaves, sI);
// -------------------------------
// Sometimes the public value sK is equal to zero,
// Thus for avoing runtime error we add one when sK =0
fp2_t hK_0[sK_max + 1], hK_1[sK_max + 1], hk_0, hk_1;
for (j = 0; j < sK; j++)
{
fp2_sub(&hK_0[j], &K[j].z, &K[j].x);
fp2_add(&hK_1[j], &K[j].z, &K[j].x);
};
// hk_0 <- use product to mulitiply all the elements in hK_0
product(&hk_0, (const fp2_t*)hK_0, sK);
// hk_1 <- use product to mulitiply all the elements in hK_1
product(&hk_1, (const fp2_t*)hK_1, sK);
// --------------------------------------------------------------
// Now, we have all the ingredients for computing the image curve
fp2_t A24, A24m,
t24, t24m; // <---- JORGE creo que podemos omitir estas variables, se usan cuando ya no se requiren los valores de la entrada A (podemos cambiar estos t's por B[0] y B[1]
fp2_copy(&A24, &A.x); // A' + 2C
fp2_sub(&A24m, &A.x, &A.z); // A' - 2C
fp2_copy(&Ap, &A24m);
// left-to-right method for computing (A' + 2C)^l and (A' - 2C)^l
for (j = 1; j < (int)p_plus_minus_bitlength[i]; j++)
{
fp2_sqr(&A24, &A24);
fp2_sqr(&A24m, &A24m);
if( ( ((int)TORSION_ODD_PRIMES[i] >> ((int)p_plus_minus_bitlength[i] - j - 1)) & 1 ) != 0 )
{
fp2_mul(&A24, &A24, &A.x);
fp2_mul(&A24m, &A24m, &Ap);
};
};
fp2_mul(&t24m, &hk_1, &r1); // output of algorithm 2 with alpha =-1 and without the demoninator
fp2_sqr(&t24m, &t24m); // raised at 2
fp2_sqr(&t24m, &t24m); // raised at 4
fp2_sqr(&t24m, &t24m); // raised at 8
fp2_mul(&t24, &hk_0, &r0); // output of algorithm 2 with alpha = 1 and without the demoninator
fp2_sqr(&t24, &t24); // raised at 2
fp2_sqr(&t24, &t24); // raised at 4
fp2_sqr(&t24, &t24); // raised at 8
fp2_mul(&A24, &A24, &t24m);
fp2_mul(&A24m, &A24m, &t24);
// Now, we have d = (A24m / A24) where the image Montgomery cuve coefficient is
// B' 2*(1 + d) 2*(A24 + A24m)
// B = ---- = --------- = --------------
// C (1 - d) (A24 - A24m)
// However, we required B' + 2C = 4*A24 and 4C = 4 * (A24 - A24m)
fp2_sub(&t24m, &A24, &A24m); // (A24 - A24m)
fp2_add(&t24m, &t24m, &t24m); // 2*(A24 - A24m)
fp2_add(&t24m, &t24m, &t24m); // 4*(A24 - A24m)
fp2_add(&t24, &A24, &A24); // 2 * A24
fp2_add(&t24, &t24, &t24); // 4 * A24
fp2_copy(&(B->x), &t24);
fp2_copy(&(B->z), &t24m);
}

View File

@@ -0,0 +1,82 @@
#ifndef _BIEXT_H_
#define _BIEXT_H_
#include <sqisign_namespace.h>
#include <ec.h>
typedef struct pairing_params
{
uint32_t e; // Points have order 2^e
ec_point_t P; // x(P)
ec_point_t Q; // x(Q)
ec_point_t PQ; // x(P-Q) = (PQX/PQZ : 1)
fp2_t ixP; // PZ/PX
fp2_t ixQ; // QZ/QX
ec_point_t A24; // ((A+2)/4 : 1)
} pairing_params_t;
// For two bases <P, Q> and <R, S> store:
// x(P - R), x(P - S), x(R - Q), x(S - Q)
typedef struct pairing_dlog_diff_points
{
ec_point_t PmR; // x(P - R)
ec_point_t PmS; // x(P - S)
ec_point_t RmQ; // x(R - Q)
ec_point_t SmQ; // x(S - Q)
} pairing_dlog_diff_points_t;
typedef struct pairing_dlog_params
{
uint32_t e; // Points have order 2^e
ec_basis_t PQ; // x(P), x(Q), x(P-Q)
ec_basis_t RS; // x(R), x(S), x(R-S)
pairing_dlog_diff_points_t diff; // x(P - R), x(P - S), x(R - Q), x(S - Q)
fp2_t ixP; // PZ/PX
fp2_t ixQ; // QZ/QX
fp2_t ixR; // RZ/RX
fp2_t ixS; // SZ/SX
ec_point_t A24; // ((A+2)/4 : 1)
} pairing_dlog_params_t;
// Computes e = e_{2^e}(P, Q) using biextension ladder
void weil(fp2_t *r, uint32_t e, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ, ec_curve_t *E);
// Computes (reduced) z = t_{2^e}(P, Q) using biextension ladder
void reduced_tate(fp2_t *r, uint32_t e, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ, ec_curve_t *E);
// Given two bases <P, Q> and <R, S> computes scalars
// such that R = [r1]P + [r2]Q, S = [s1]P + [s2]Q
void ec_dlog_2_weil(digit_t *r1,
digit_t *r2,
digit_t *s1,
digit_t *s2,
ec_basis_t *PQ,
const ec_basis_t *RS,
ec_curve_t *curve,
int e);
// Given two bases <P, Q> and <R, S>
// where <P, Q> is a basis for E[2^f]
// the full 2-torsion, and <R,S> a basis
// for smaller torsion E[2^e]
// computes scalars r1, r2, s1, s2
// such that R = [r1]P + [r2]Q, S = [s1]P + [s2]Q
void ec_dlog_2_tate(digit_t *r1,
digit_t *r2,
digit_t *s1,
digit_t *s2,
const ec_basis_t *PQ,
const ec_basis_t *RS,
ec_curve_t *curve,
int e);
void ec_dlog_2_tate_to_full(digit_t *r1,
digit_t *r2,
digit_t *s1,
digit_t *s2,
ec_basis_t *PQ,
ec_basis_t *RS,
ec_curve_t *curve,
int e);
#endif

View File

@@ -1,28 +0,0 @@
#ifndef CURVE_EXTRAS_H
#define CURVE_EXTRAS_H
#include "ec.h"
#include "torsion_constants.h"
typedef struct jac_point_t {
fp2_t x;
fp2_t y;
fp2_t z;
} jac_point_t;
bool ec_is_zero(ec_point_t const* P);
void copy_point(ec_point_t* P, ec_point_t const* Q);
void swap_points(ec_point_t* P, ec_point_t* Q, const digit_t option);
void ec_init(ec_point_t* P);
void xDBLv2(ec_point_t* Q, ec_point_t const* P, ec_point_t const* A24);
void xDBLADD(ec_point_t* R, ec_point_t* S, ec_point_t const* P, ec_point_t const* Q, ec_point_t const* PQ, ec_point_t const* A24);
void xDBLMUL(ec_point_t* S, ec_point_t const* P, digit_t const* k, ec_point_t const* Q, digit_t const* l, ec_point_t const* PQ, ec_curve_t const* curve);
void xDBL(ec_point_t* Q, ec_point_t const* P, ec_point_t const* AC);
void xMUL(ec_point_t* Q, ec_point_t const* P, digit_t const* k, ec_curve_t const* curve);
void xDBLMUL(ec_point_t* S, ec_point_t const* P, digit_t const* k, ec_point_t const* Q, digit_t const* l, ec_point_t const* PQ, ec_curve_t const* curve);
#define is_point_equal ec_is_equal
#define xADD ec_add
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,84 +1,28 @@
#ifndef _ISOG_H_
#define _ISOG_H_
#include <sqisign_namespace.h>
#include <ec.h>
#include "curve_extras.h"
#include "poly.h"
extern int sI, sJ, sK; // Sizes of each current I, J, and K
extern fp2_t I[sI_max][2], // I plays also as the linear factors of the polynomial h_I(X)
EJ_0[sJ_max][3], EJ_1[sJ_max][3]; // To be used in xisog y xeval
extern ec_point_t J[sJ_max], K[sK_max]; // Finite subsets of the kernel
extern fp2_t XZJ4[sJ_max], // -4* (Xj * Zj) for each j in J, and x([j]P) = (Xj : Zj)
rtree_A[(1 << (ceil_log_sI_max+2)) - 1], // constant multiple of the reciprocal tree computation
A0; // constant multiple of the reciprocal R0
extern poly ptree_hI[(1 << (ceil_log_sI_max+2)) - 1], // product tree of h_I(X)
rtree_hI[(1 << (ceil_log_sI_max+2)) - 1], // reciprocal tree of h_I(X)
ptree_EJ[(1 << (ceil_log_sJ_max+2)) - 1]; // product tree of E_J(X)
extern fp2_t R0[2*sJ_max + 1]; // Reciprocal of h_I(X) required in the scaled remainder tree approach
extern int deg_ptree_hI[(1 << (ceil_log_sI_max+2)) - 1], // degree of each noed in the product tree of h_I(X)
deg_ptree_EJ[(1 << (ceil_log_sJ_max+2)) - 1]; // degree of each node in the product tree of E_J(X)
extern fp2_t leaves[sI_max]; // leaves of the remainder tree, which are required in the Resultant computation
void eds2mont(ec_point_t* P); // mapping from Twisted edwards into Montogmery
void yadd(ec_point_t* R, ec_point_t* const P, ec_point_t* const Q, ec_point_t* const PQ); // differential addition on Twisted edwards model
void CrissCross(fp2_t *r0, fp2_t *r1, fp2_t const alpha, fp2_t const beta, fp2_t const gamma, fp2_t const delta);
void kps_t(uint64_t const i, ec_point_t const P, ec_point_t const A); // tvelu formulae
void kps_s(uint64_t const i, ec_point_t const P, ec_point_t const A); // svelu formulae
void xisog_4(ec_point_t* B, ec_point_t const P); // degree-4 isogeny construction
void xisog_4_singular(ec_point_t* B24, ec_point_t const P, ec_point_t A24);
void xisog_2(ec_point_t* B, ec_point_t const P); // degree-2 isogeny construction
void xisog_t(ec_point_t* B, uint64_t const i, ec_point_t const A); // tvelu formulae
void xisog_s(ec_point_t* B, uint64_t const i, ec_point_t const A); // svelu formulae
void xeval_4(ec_point_t* R, const ec_point_t* Q, const int lenQ); // degree-4 isogeny evaluation
void xeval_4_singular(ec_point_t* R, const ec_point_t* Q, const int lenQ, const ec_point_t P);
void xeval_2(ec_point_t* R, ec_point_t* const Q, const int lenQ); // degree-2 isogeny evaluation
void xeval_t(ec_point_t* Q, uint64_t const i, ec_point_t const P); // tvelu formulae
void xeval_s(ec_point_t* Q, uint64_t const i, ec_point_t const P, ec_point_t const A); // svelu formulae
// Strategy-based 4-isogeny chain
static void ec_eval_even_strategy(ec_curve_t* image, ec_point_t* points, unsigned short points_len,
ec_point_t* A24, const ec_point_t *kernel, const int isog_len);
void kps_clear(int i); // Clear memory assigned by KPS
// hybrid velu formulae
static inline void kps(uint64_t const i, ec_point_t const P, ec_point_t const A)
/* KPS structure for isogenies of degree 2 or 4 */
typedef struct
{
// Next branch only depends on a fixed public bound (named gap)
if (TORSION_ODD_PRIMES[i] <= gap)
kps_t(i, P, A);
else
kps_s(i, P, A);
}
static inline void xisog(ec_point_t* B, uint64_t const i, ec_point_t const A)
ec_point_t K;
} ec_kps2_t;
typedef struct
{
// Next branch only depends on a fixed public bound (named gap)
if (TORSION_ODD_PRIMES[i] <= gap)
xisog_t(B, i, A);
else
xisog_s(B, i, A);
}
ec_point_t K[3];
} ec_kps4_t;
static inline void xeval(ec_point_t* Q, uint64_t const i, ec_point_t const P, ec_point_t const A)
{
// Next branch only depends on a fixed public bound (named gap)
if (TORSION_ODD_PRIMES[i] <= gap)
xeval_t(Q, i, P);
else
xeval_s(Q, i, P, A);
}
void xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P); // degree-2 isogeny construction
void xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24);
void xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P); // degree-4 isogeny construction
void xisog_4_singular(ec_kps4_t *kps, ec_point_t *B24, const ec_point_t P, ec_point_t A24);
void xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps);
void xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps);
void xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps);
void xeval_4_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_point_t P, const ec_kps4_t *kps);
#endif

View File

@@ -1,28 +0,0 @@
#ifndef _POLY_H_
#define _POLY_H_
#include <fp2.h>
typedef fp2_t *poly; // Polynomials are arrays of coeffs over Fq, lowest degree first
void poly_mul(poly h, const poly f, const int lenf, const poly g, const int leng);
void poly_mul_low(poly h, const int n, const poly f, const int lenf, const poly g, const int leng);
void poly_mul_middle(poly h, const poly g, const int leng, const poly f, const int lenf);
void poly_mul_selfreciprocal(poly h, const poly g, const int leng, const poly f, const int lenf);
void product_tree(poly H[], int DEG[], const int root, const poly F[], const int LENF, const int n);
void product_tree_LENFeq2(poly H[], int DEG[], const int root, const fp2_t F[][2], const int n);
void product_tree_LENFeq3(poly H[], int DEG[], const int root, const fp2_t F[][3], const int n);
void product_tree_selfreciprocal(poly H[], int DEG[], const int root, const poly F[], const int LENF, const int n);
void product_tree_selfreciprocal_LENFeq3(poly H[], int DEG[], const int root, const fp2_t F[][3], const int n);
void clear_tree(poly H[], const int root, const int n);
void product(fp2_t *c, const fp2_t F[], const int n);
void reciprocal(poly h, fp2_t *c, const poly f, const int lenf, const int n);
void poly_redc(poly h, const poly g, const int leng, const poly f, const int lenf,const poly f_inv, const fp2_t c);
void reciprocal_tree(poly *R, fp2_t *A, const int leng, const poly H[], const int DEG[], const int root, const int n);
void multieval_unscaled(fp2_t REM[], const poly g, const int leng, const poly R[], const fp2_t A[], const poly H[], const int DEG[], const int root, const int n);
void multieval_scaled(fp2_t REM[], const poly G, const poly H[], const int DEG[], const int root, const int n);
#endif /* _POLY_H */

View File

@@ -1,50 +0,0 @@
#ifndef _SDACS_H_
#define _SDACS_H_
static char SDAC_P_0[] = "0";
static char SDAC_P_1[] = "10";
static char SDAC_P_2[] = "100";
static char SDAC_P_3[] = "0100";
static char SDAC_P_4[] = "10000";
static char SDAC_P_5[] = "110000";
static char SDAC_P_6[] = "100000";
static char SDAC_P_7[] = "1100010001";
static char SDAC_P_8[] = "1001010000";
static char SDAC_P_9[] = "0101001000";
static char SDAC_P_10[] = "110110010000";
static char SDAC_P_11[] = "10000000000";
static char SDAC_P_12[] = "1010100001001000";
static char SDAC_M_0[] = "";
static char SDAC_M_1[] = "000";
static char SDAC_M_2[] = "1010";
static char SDAC_M_3[] = "100010";
static char SDAC_M_4[] = "0010000";
static char SDAC_M_5[] = "110000000";
static char SDAC_M_6[] = "1010101010";
static char SDAC_M_7[] = "1010001000";
static char SDAC_M_8[] = "1001000000";
static char SDAC_M_9[] = "0100001000";
static char SDAC_M_10[] ="101101010000";
static char SDAC_M_11[] = "100100010010";
static char SDAC_M_12[] = "010100011000";
static char SDAC_M_13[] = "101010000001";
static char SDAC_M_14[] = "010100001000";
static char SDAC_M_15[] = "1101010010000";
static char SDAC_M_16[] = "1001010001010";
static char SDAC_M_17[] = "101001000000101";
static char *SDACs[31] = {
SDAC_P_0, SDAC_P_1, SDAC_P_2, SDAC_P_3, SDAC_P_4,
SDAC_P_5, SDAC_P_6, SDAC_P_7, SDAC_P_8, SDAC_P_9,
SDAC_P_10, SDAC_P_11, SDAC_P_12,
SDAC_M_0, SDAC_M_1, SDAC_M_2, SDAC_M_3, SDAC_M_4,
SDAC_M_5, SDAC_M_6, SDAC_M_7, SDAC_M_8, SDAC_M_9,
SDAC_M_10, SDAC_M_11, SDAC_M_12, SDAC_M_13, SDAC_M_14,
SDAC_M_15, SDAC_M_16, SDAC_M_17
};
static int LENGTHS[] = {
1, 2, 3, 4, 5, 6, 6, 10, 10, 10, 12, 11, 16, 0, 3, 4, 6, 7, 9, 10, 10, 10, 10, 12, 12, 12, 12, 12, 13, 13, 15
};
#endif

View File

@@ -1,28 +0,0 @@
#ifndef TEDWARDS_H
#define TEDWARDS_H
#include <fp2.h>
#include "ec.h"
// a*x^2+y^2=1+d*x^2*y^2
typedef struct ted_point_t {
fp2_t x;
fp2_t y;
fp2_t z;
fp2_t t; // t = x*y/z
} ted_point_t;
void ted_init(ted_point_t* P);
bool is_ted_equal(ted_point_t const* P1, ted_point_t const* P2);
void copy_ted_point(ted_point_t* P, ted_point_t const* Q);
void ted_neg(ted_point_t* Q, ted_point_t const* P);
void ted_dbl(ted_point_t* Q, ted_point_t const* P, ec_curve_t const* E);
void ted_add(ted_point_t* S, ted_point_t const* P, ted_point_t const* Q, ec_curve_t const* E);
void mont_to_ted(ec_curve_t* E, ec_curve_t const* A);
void mont_to_ted_point(ted_point_t* Q, ec_point_t const* P, ec_curve_t const* A);
void ted_to_mont_point(ec_point_t* Q, ted_point_t const* P);
#endif

View File

@@ -1,17 +1 @@
set(SOURCE_FILES_EC_${SVARIANT_UPPER}_REF
${ECX_DIR}/poly-mul.c
${ECX_DIR}/poly-redc.c
${ECX_DIR}/ec.c
${ECX_DIR}/tedwards.c
${ECX_DIR}/kps.c
${ECX_DIR}/xisog.c
${ECX_DIR}/xeval.c
${ECX_DIR}/isog_chains.c
${ECX_DIR}/basis.c
)
add_library(${LIB_EC_${SVARIANT_UPPER}} ${SOURCE_FILES_EC_${SVARIANT_UPPER}_REF})
target_include_directories(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_EC})
target_compile_options(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
add_subdirectory(test)
include(../lvlx.cmake)

View File

@@ -1,36 +1 @@
add_executable(fp2.test_${SVARIANT_LOWER} ${ECX_DIR}/test/fp2-test.c)
target_include_directories(fp2.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_GF_${SVARIANT_UPPER}} ${INC_EC} ${INC_COMMON})
target_link_libraries(fp2.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}})
add_executable(poly-mul.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-mul-test.c)
target_include_directories(poly-mul.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-mul.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(poly-redc.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-redc-test.c)
target_include_directories(poly-redc.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-redc.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(mont.test_${SVARIANT_LOWER} ${ECX_DIR}/test/mont-test.c)
target_include_directories(mont.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(mont.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(ec.test_${SVARIANT_LOWER} ${ECX_DIR}/test/ec-test.c ${ECX_DIR}/test/test_extras.c)
target_include_directories(ec.test_${SVARIANT_LOWER} PUBLIC ${ECX_DIR}/test ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(ec.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(velu.test_${SVARIANT_LOWER} ${ECX_DIR}/test/velu-test.c)
target_include_directories(velu.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(velu.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(isog.test_${SVARIANT_LOWER} ${ECX_DIR}/test/isog-test.c)
target_include_directories(isog.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(isog.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_test(ec_fp2.test_${SVARIANT_LOWER} fp2.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-mul.test_${SVARIANT_LOWER} poly-mul.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-redc.test_${SVARIANT_LOWER} poly-redc.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_mont.test_${SVARIANT_LOWER} mont.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_ec.test_${SVARIANT_LOWER} ec.test_${SVARIANT_LOWER} test ${SQISIGN_TEST_REPS})
add_test(ec_velu.test_${SVARIANT_LOWER} velu.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_isog.test_${SVARIANT_LOWER} isog.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
include(../../lvlx_test.cmake)

View File

@@ -1,400 +0,0 @@
#ifndef EC_TESTS_H
#define EC_TESTS_H
#include "test_extras.h"
#include <stdio.h>
#include <string.h>
#include <bench.h> //////// NOTE: enable later
#include "test-basis.h"
#include "ec_params.h"
// Global constants
extern const digit_t p[NWORDS_FIELD];
// Benchmark and test parameters
static int BENCH_LOOPS = 1000; // Number of iterations per bench
static int TEST_LOOPS = 512; // Number of iterations per test
bool ec_test()
{ // Tests for ecc arithmetic
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_point_t AC = {0};
digit_t k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing ecc functions: \n\n");
// Point doubling
passed = 1;
P.x.re[0] = 0xDFD70ED0861BD329; P.x.re[1] = 0x20ACD3758C7F5540; P.x.re[2] = 0x3DCCDC007277F80A; P.x.re[3] = 0x18D6D2A22981DCE1;
P.x.im[0] = 0x3C23730A3F08F38C; P.x.im[1] = 0x98BB973AFD3D954D; P.x.im[2] = 0x8D98ADFC2829AE8A; P.x.im[3] = 0x21A2464D6369AFBA;
P.z.re[0] = 0x01;
AC.z.re[0] = 0x01;
fp2_tomont(&AC.z, &AC.z);
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
xDBL(&S, &R, &AC);
fp2_copy(&SS.x, &S.x); // Copy of S = SS <- 2P
fp2_copy(&SS.z, &S.z);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x5950EE0A4AF90FC8; R.x.re[1] = 0x16488065A0A98B08; R.x.re[2] = 0xCE65322229DA0FD1; R.x.re[3] = 0x270A35FF781EE204;
R.x.im[0] = 0x564447FD9EC57F6B; R.x.im[1] = 0x2EE24E984294F729; R.x.im[2] = 0x53A6C7360E972C71; R.x.im[3] = 0x4FCF4B9928A7C7E;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2)!=0) { passed=0; goto out0; }
Q.x.re[0] = 0xC46076A670C70053; Q.x.re[1] = 0x97517AFA3AB9ED13; Q.x.re[2] = 0x349644C942EDF993; Q.x.re[3] = 0xBB4A4DB6F29AF9E;
Q.x.im[0] = 0x8B47629FB5A15BB0; Q.x.im[1] = 0x4EC6E809953C1A10; Q.x.im[2] = 0x1F83F0EC6CBB84D6; Q.x.im[3] = 0x1D8417C1D33265D3;
Q.z.re[0] = 0x01;
PQ.x.re[0] = 0x853F66D11BE5534F; PQ.x.re[1] = 0x27C8FD4E52D03D4A; PQ.x.re[2] = 0xF88EA78D0A0C29D2; PQ.x.re[3] = 0x2F6DFB07D397A067;
PQ.x.im[0] = 0xE8DBC4AA34434BA1; PQ.x.im[1] = 0x7A73AE182636F8A0; PQ.x.im[2] = 0x419EC260137868EB; PQ.x.im[3] = 0x129B3E301703D43F;
PQ.z.re[0] = 0x01;
fp2_tomont(&S.x, &Q.x);
fp2_tomont(&S.z, &Q.z);
fp2_tomont(&PQ.x, &PQ.x);
fp2_tomont(&PQ.z, &PQ.z);
xADD(&S, &SS, &S, &PQ);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xED0BEB8F93AB4FF9; R.x.re[1] = 0x27CF508B80CD49BF; R.x.re[2] = 0x38A6134DFA04B2BA; R.x.re[3] = 0x27B4CB15E109EF1F;
R.x.im[0] = 0x6F731BA6FD227BDE; R.x.im[1] = 0x14C12335341167F8; R.x.im[2] = 0xECA7B60F7866E27A; R.x.im[3] = 0x2A7A79A152880457;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 126;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xDE80F87A1203A147; R.x.re[1] = 0xD59E1215928A3B2D; R.x.re[2] = 0xD5A67F83A5A8CE46; R.x.re[3] = 0xA11E162488C9CDF;
R.x.im[0] = 0x9417D0D79A26741B; R.x.im[1] = 0x8B1F47D6F0FE5EEC; R.x.im[2] = 0xE52188DCB054CE36; R.x.im[3] = 0x1A8075A6C3148AB3;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xD3938B0A68A3E7C0; R.x.re[1] = 0xE0667113208A0595; R.x.re[2] = 0x258F314C84E9CB60; R.x.re[3] = 0x14984BA7CA59AB71;
R.x.im[0] = 0xFE728423EE3BFEF4; R.x.im[1] = 0xBF68C42FE21AE0E4; R.x.im[2] = 0xA8FAF9C9528609CA; R.x.im[3] = 0x1225EC77A1DC0285;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &Q.x);
fp2_tomont(&R.z, &Q.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
l[0] = 0x34AB78B6C6B2D8C0;
l[1] = 0xDE6B2D8CD00F38D1;
l[2] = 0xA35F4A7897E17CE2;
l[3] = 0x20ACF4A789614D13;
fp2_inv(&SS.z);
fp2_mul(&SS.x, &SS.x, &SS.z);
fp2_copy(&SS.z, &R.z);
xDBLMUL(&S, &R, k, &SS, l, &PQ, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x554E1ADC609B992F; R.x.re[1] = 0xE407D961F8CC4C42; R.x.re[2] = 0x1CF626AFED5A68CE; R.x.re[3] = 0x6D02692EE110483;
R.x.im[0] = 0x16FB094E831C8997; R.x.im[1] = 0xFDE4ECF31DC5F702; R.x.im[2] = 0x89303D868DFAD7B4; R.x.im[3] = 0xC91ACE81346F22D;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
out0:
if (passed==1) printf(" ECC arithmetic tests ............................................ PASSED");
else { printf(" ECC arithmetic tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool dlog_test()
{ // Tests for dlog
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
digit_t kt[NWORDS_ORDER], lt[NWORDS_ORDER], f1[NWORDS_ORDER] = {0}, f2[NWORDS_ORDER] = {0}, zero[NWORDS_ORDER] = {0}, tpFdiv2[NWORDS_ORDER] = {0}, tpF[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing dlog functions: \n\n");
// dlog2 testing
passed = 1;
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(f1, TWOpFm1);
fp_copy(f2, TWOpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[0] = 0xFFFFFFFFFFFFFFFF;
k[1] = 0x00000000000007FF;
l[0] = 0xFFFFFFFFFFFFFFFE;
l[1] = 0x00000000000007FF;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, f1, NWORDS_ORDER) == 1 ||
(compare_words(l, f1, NWORDS_ORDER) == 1 && (compare_words(k, zero, NWORDS_ORDER) == 0 || compare_words(k, f1, NWORDS_ORDER) == 0))) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, f2, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, f2, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog2 tests ..................................................... PASSED");
else { printf(" dlog2 tests... FAILED"); printf("\n"); return false; }
printf("\n");
// dlog3 testing
passed = 1;
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(tpFdiv2, THREEpFdiv2);
fp_copy(tpF, THREEpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[1] = 0;
l[1] = 0;
k[0] = 0x02153E468B91C6D1;
l[0] = 0x02153E468B91C6D0;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, tpFdiv2, NWORDS_ORDER) == 1 ||
(compare_words(l, tpFdiv2, NWORDS_ORDER) == 1 && compare_words(k, zero, NWORDS_ORDER) == 0)) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, tpF, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, tpF, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog3 tests ..................................................... PASSED");
else { printf(" dlog3 tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool ec_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P, Q, R, PQ, AC;
digit_t k[NWORDS_ORDER], l[NWORDS_ORDER];
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking ecc arithmetic: \n\n");
// Point doubling
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBL(&Q, &P, &AC);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" Montgomery x-only doubling runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point addition
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xADD(&R, &Q, &P, &PQ);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only addition runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xMUL(&Q, &P, k, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only scalar multiplication runs in ................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBLMUL(&R, &P, k, &Q, l, &PQ, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only double-scalar multiplication runs in .......... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
bool dlog_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking dlog2: \n\n");
// dlog2 computation
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog2 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// dlog3 computation
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog3 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
#endif

View File

@@ -1,24 +0,0 @@
#ifndef TEST_BASIS_H
#define TEST_BASIS_H
#include "fp2.h"
// Full-torsion basis for A=0 (excluding 2^f and huge prime factors)
const fp2_t xPA = {{0x7505815fb30f099e,0x89e78dbb4294c8df,0x7db9b4b1f7716d7b,0x13fcd4c87af65308},{0x93533c1017088fd4,0x6df9e398a1bb4cb1,0xc928f082be2e2b4c,0x17aa7e2906bef0af}};
const fp2_t xQA = {{0xe96336b75eb5a505,0x5640cecad0ad7b5a,0x1394f0771bc58ac1,0x18d92124656d68d9},{0xa54e8e24605754f0,0xe52de9790bbe4bb9,0x3bf9b7833f62e255,0x277a07644ec4f0e2}};
const fp2_t xPQA = {{0xc8fcceb408e3444c,0x9f8ca4d2c05c3287,0x259e496f17c0f529,0x0eb18a51c2a3dd1a},{0x1014dbe2534b8310,0x6b035ee3c371ea12,0x8354ecb4c111db6d,0x178259b78fe08093}};
const fp2_t xPB = {{0xbd0a2f0c9a5378ca,0x74af17405042203d,0x0ccdcb4b7f0b8c15,0x314c70951a92d8bf},{0xe889e6bc5f9842af,0xefb0edbb5e266ab3,0x7bfb9d05f1ba6962,0x0a5f3f4fe6f16514}};
const fp2_t xQB = {{0x137e215438caaf3b,0xc4403ee1b69f1382,0x2b5783edcefa7246,0x3015572698262f66},{0x8e88e4293f84536e,0x8d6dbc277f85ff77,0xb3f17b53b01da916,0x08dd3f4976c5dad1}};
const fp2_t xPQB = {{0xf0c2701a7050d9b9,0xc8fdb069c0234d3a,0x9ec25780f2b101a8,0x221a0565053e8ff4},{0xd8513bf6a05910ae,0x47ff2422258dfb3a,0xb98ccceae31ac407,0x21bcc8e659aaa1b3}};
// 2^f-torsion basis for A=0
const fp2_t xP2 = {{0xfc93bac7df77fd30,0xa8d37e10783215bd,0x4bd2ece4f148039b,0x2bd5b83f5f8c09fb},{0x444112970b59f12f,0x557b8b9beb55c276,0x633f97cd9464df6c,0x00a1b21b593a2dfd}};
const fp2_t xQ2 = {{0x6b4289960273222c,0xa290d8eb8e343a04,0x0c0a333f80a0ed68,0x31a58910e276aff0},{0xb7ca615ad7473865,0xeb6f72f20794f050,0x2941c3fe3203b94f,0x32ad5cbe915e467b}};
const fp2_t xPQ2 = {{0xac9f90005e47b095,0x47eafdafd5168836,0xb88aac8334acdad0,0x1a5cf52a20f665b4},{0x4baa70fb1f5fa99c,0xffb7ddb12c87f1a3,0xdd3a229d370a8484,0x1e992ad0a14baf03}};
// 3^g-torsion basis for A=0
const fp2_t xP3 = {{0x8cf496c2722f340d,0x3e329c5a507ad39c,0xa0c7caa3e4537e25,0x1371d43cf97de48e},{0xa4b94c97b8149e7d,0xd290853fa14704c7,0x158b854173c1b289,0x04c6dcda7872c23f}};
const fp2_t xQ3 = {{0x0f6380fd4c963950,0x101a22a245c4f563,0x601d3e30b21a5f43,0x0becd5f73b067949},{0xd364123c6806057e,0x8ff24fca9e060260,0x3b52df5bfb817901,0x30950462489b838f}};
const fp2_t xPQ3 = {{0xe04cab7169e64a82,0x56df573ea9295c19,0x06cbb6af8e341990,0x0f1046ca03017ca1},{0x2dac3457c35be728,0x2f59af21113f25f9,0xa0dc4f54eec2715d,0x102ecf9a7ff2f2ff}};
#endif

View File

@@ -1,17 +1 @@
set(SOURCE_FILES_EC_${SVARIANT_UPPER}_REF
${ECX_DIR}/poly-mul.c
${ECX_DIR}/poly-redc.c
${ECX_DIR}/ec.c
${ECX_DIR}/tedwards.c
${ECX_DIR}/kps.c
${ECX_DIR}/xisog.c
${ECX_DIR}/xeval.c
${ECX_DIR}/isog_chains.c
${ECX_DIR}/basis.c
)
add_library(${LIB_EC_${SVARIANT_UPPER}} ${SOURCE_FILES_EC_${SVARIANT_UPPER}_REF})
target_include_directories(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_EC})
target_compile_options(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
add_subdirectory(test)
include(../lvlx.cmake)

View File

@@ -1,36 +1 @@
add_executable(fp2.test_${SVARIANT_LOWER} ${ECX_DIR}/test/fp2-test.c)
target_include_directories(fp2.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_GF_${SVARIANT_UPPER}} ${INC_EC} ${INC_COMMON})
target_link_libraries(fp2.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}})
add_executable(poly-mul.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-mul-test.c)
target_include_directories(poly-mul.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-mul.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(poly-redc.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-redc-test.c)
target_include_directories(poly-redc.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-redc.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(mont.test_${SVARIANT_LOWER} ${ECX_DIR}/test/mont-test.c)
target_include_directories(mont.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(mont.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(ec.test_${SVARIANT_LOWER} ${ECX_DIR}/test/ec-test.c ${ECX_DIR}/test/test_extras.c)
target_include_directories(ec.test_${SVARIANT_LOWER} PUBLIC ${ECX_DIR}/test ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(ec.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(velu.test_${SVARIANT_LOWER} ${ECX_DIR}/test/velu-test.c)
target_include_directories(velu.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(velu.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(isog.test_${SVARIANT_LOWER} ${ECX_DIR}/test/isog-test.c)
target_include_directories(isog.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(isog.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_test(ec_fp2.test_${SVARIANT_LOWER} fp2.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-mul.test_${SVARIANT_LOWER} poly-mul.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-redc.test_${SVARIANT_LOWER} poly-redc.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_mont.test_${SVARIANT_LOWER} mont.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_ec.test_${SVARIANT_LOWER} ec.test_${SVARIANT_LOWER} test ${SQISIGN_TEST_REPS})
add_test(ec_velu.test_${SVARIANT_LOWER} velu.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_isog.test_${SVARIANT_LOWER} isog.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
include(../../lvlx_test.cmake)

View File

@@ -1,400 +0,0 @@
#ifndef EC_TESTS_H
#define EC_TESTS_H
#include "test_extras.h"
#include <stdio.h>
#include <string.h>
#include <bench.h> //////// NOTE: enable later
#include "test-basis.h"
#include "ec_params.h"
// Global constants
extern const digit_t p[NWORDS_FIELD];
// Benchmark and test parameters
static int BENCH_LOOPS = 1000; // Number of iterations per bench
static int TEST_LOOPS = 512; // Number of iterations per test
bool ec_test()
{ // Tests for ecc arithmetic
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_point_t AC = {0};
digit_t k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing ecc functions: (NOT IMPLEMENTED) \n\n");
/*
// Point doubling
passed = 1;
P.x.re[0] = 0xDFD70ED0861BD329; P.x.re[1] = 0x20ACD3758C7F5540; P.x.re[2] = 0x3DCCDC007277F80A; P.x.re[3] = 0x18D6D2A22981DCE1;
P.x.im[0] = 0x3C23730A3F08F38C; P.x.im[1] = 0x98BB973AFD3D954D; P.x.im[2] = 0x8D98ADFC2829AE8A; P.x.im[3] = 0x21A2464D6369AFBA;
P.z.re[0] = 0x01;
AC.z.re[0] = 0x01;
fp2_tomont(&AC.z, &AC.z);
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
xDBL(&S, &R, &AC);
fp2_copy(&SS.x, &S.x); // Copy of S = SS <- 2P
fp2_copy(&SS.z, &S.z);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x5950EE0A4AF90FC8; R.x.re[1] = 0x16488065A0A98B08; R.x.re[2] = 0xCE65322229DA0FD1; R.x.re[3] = 0x270A35FF781EE204;
R.x.im[0] = 0x564447FD9EC57F6B; R.x.im[1] = 0x2EE24E984294F729; R.x.im[2] = 0x53A6C7360E972C71; R.x.im[3] = 0x4FCF4B9928A7C7E;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2)!=0) { passed=0; goto out0; }
Q.x.re[0] = 0xC46076A670C70053; Q.x.re[1] = 0x97517AFA3AB9ED13; Q.x.re[2] = 0x349644C942EDF993; Q.x.re[3] = 0xBB4A4DB6F29AF9E;
Q.x.im[0] = 0x8B47629FB5A15BB0; Q.x.im[1] = 0x4EC6E809953C1A10; Q.x.im[2] = 0x1F83F0EC6CBB84D6; Q.x.im[3] = 0x1D8417C1D33265D3;
Q.z.re[0] = 0x01;
PQ.x.re[0] = 0x853F66D11BE5534F; PQ.x.re[1] = 0x27C8FD4E52D03D4A; PQ.x.re[2] = 0xF88EA78D0A0C29D2; PQ.x.re[3] = 0x2F6DFB07D397A067;
PQ.x.im[0] = 0xE8DBC4AA34434BA1; PQ.x.im[1] = 0x7A73AE182636F8A0; PQ.x.im[2] = 0x419EC260137868EB; PQ.x.im[3] = 0x129B3E301703D43F;
PQ.z.re[0] = 0x01;
fp2_tomont(&S.x, &Q.x);
fp2_tomont(&S.z, &Q.z);
fp2_tomont(&PQ.x, &PQ.x);
fp2_tomont(&PQ.z, &PQ.z);
xADD(&S, &SS, &S, &PQ);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xED0BEB8F93AB4FF9; R.x.re[1] = 0x27CF508B80CD49BF; R.x.re[2] = 0x38A6134DFA04B2BA; R.x.re[3] = 0x27B4CB15E109EF1F;
R.x.im[0] = 0x6F731BA6FD227BDE; R.x.im[1] = 0x14C12335341167F8; R.x.im[2] = 0xECA7B60F7866E27A; R.x.im[3] = 0x2A7A79A152880457;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 126;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xDE80F87A1203A147; R.x.re[1] = 0xD59E1215928A3B2D; R.x.re[2] = 0xD5A67F83A5A8CE46; R.x.re[3] = 0xA11E162488C9CDF;
R.x.im[0] = 0x9417D0D79A26741B; R.x.im[1] = 0x8B1F47D6F0FE5EEC; R.x.im[2] = 0xE52188DCB054CE36; R.x.im[3] = 0x1A8075A6C3148AB3;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xD3938B0A68A3E7C0; R.x.re[1] = 0xE0667113208A0595; R.x.re[2] = 0x258F314C84E9CB60; R.x.re[3] = 0x14984BA7CA59AB71;
R.x.im[0] = 0xFE728423EE3BFEF4; R.x.im[1] = 0xBF68C42FE21AE0E4; R.x.im[2] = 0xA8FAF9C9528609CA; R.x.im[3] = 0x1225EC77A1DC0285;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &Q.x);
fp2_tomont(&R.z, &Q.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
l[0] = 0x34AB78B6C6B2D8C0;
l[1] = 0xDE6B2D8CD00F38D1;
l[2] = 0xA35F4A7897E17CE2;
l[3] = 0x20ACF4A789614D13;
fp2_inv(&SS.z);
fp2_mul(&SS.x, &SS.x, &SS.z);
fp2_copy(&SS.z, &R.z);
xDBLMUL(&S, &R, k, &SS, l, &PQ, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x554E1ADC609B992F; R.x.re[1] = 0xE407D961F8CC4C42; R.x.re[2] = 0x1CF626AFED5A68CE; R.x.re[3] = 0x6D02692EE110483;
R.x.im[0] = 0x16FB094E831C8997; R.x.im[1] = 0xFDE4ECF31DC5F702; R.x.im[2] = 0x89303D868DFAD7B4; R.x.im[3] = 0xC91ACE81346F22D;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
out0:
if (passed==1) printf(" ECC arithmetic tests ............................................ PASSED");
else { printf(" ECC arithmetic tests... FAILED"); printf("\n"); return false; }
printf("\n");
*/
return OK;
}
bool dlog_test()
{ // Tests for dlog
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
digit_t kt[NWORDS_ORDER], lt[NWORDS_ORDER], f1[NWORDS_ORDER] = {0}, f2[NWORDS_ORDER] = {0}, zero[NWORDS_ORDER] = {0}, tpFdiv2[NWORDS_ORDER] = {0}, tpF[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing dlog functions: \n\n");
// dlog2 testing
passed = 1;
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(f1, TWOpFm1);
fp_copy(f2, TWOpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[0] = 0xFFFFFFFFFFFFFFFF;
k[1] = 0x00000000000007FF;
l[0] = 0xFFFFFFFFFFFFFFFE;
l[1] = 0x00000000000007FF;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, f1, NWORDS_ORDER) == 1 ||
(compare_words(l, f1, NWORDS_ORDER) == 1 && (compare_words(k, zero, NWORDS_ORDER) == 0 || compare_words(k, f1, NWORDS_ORDER) == 0))) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, f2, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, f2, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog2 tests ..................................................... PASSED");
else { printf(" dlog2 tests... FAILED"); printf("\n"); return false; }
printf("\n");
// dlog3 testing
passed = 1;
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(tpFdiv2, THREEpFdiv2);
fp_copy(tpF, THREEpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[1] = 0;
l[1] = 0;
k[0] = 0x02153E468B91C6D1;
l[0] = 0x02153E468B91C6D0;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, tpFdiv2, NWORDS_ORDER) == 1 ||
(compare_words(l, tpFdiv2, NWORDS_ORDER) == 1 && compare_words(k, zero, NWORDS_ORDER) == 0)) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, tpF, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, tpF, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog3 tests ..................................................... PASSED");
else { printf(" dlog3 tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool ec_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P, Q, R, PQ, AC;
digit_t k[NWORDS_ORDER], l[NWORDS_ORDER];
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking ecc arithmetic: \n\n");
// Point doubling
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBL(&Q, &P, &AC);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" Montgomery x-only doubling runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point addition
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xADD(&R, &Q, &P, &PQ);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only addition runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xMUL(&Q, &P, k, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only scalar multiplication runs in ................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBLMUL(&R, &P, k, &Q, l, &PQ, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only double-scalar multiplication runs in .......... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
bool dlog_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking dlog2: \n\n");
// dlog2 computation
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog2 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// dlog3 computation
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog3 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
#endif

View File

@@ -1,24 +0,0 @@
#ifndef TEST_BASIS_H
#define TEST_BASIS_H
#include "fp2.h"
// Full-torsion basis for A=0 (excluding 2^f and huge prime factors)
const fp2_t xPA = {{0x35b53c72e7494775,0x5791b499bc29710d,0x2060f3aca68fa4ff,0x81150c19a14f523a,0x08af6c81a906d44a,0x00cca2a93efb536e},{0x14eaac356375af76,0x5655011e771be3b4,0x6273ccee274d7754,0x440d6b5b4496c183,0xa3d7f80e9f9111ba,0x0302e153bee01a18}};
const fp2_t xQA = {{0x80c0767d1b7b5fd8,0x24e9039d430ca3b5,0x26485254625dc85a,0x612eaebc345b64d1,0x59669fbd946a4409,0x004c3a8564e16101},{0x0e1eac4e38449c54,0x752c042b4c6675cb,0x88ec0e75c8e9ea0e,0xbf7c4cdbfc4483f0,0xd594cb5474bbc264,0x02f5e2345a9b4654}};
const fp2_t xPQA = {{0x1f5accaff9a7da90,0x91884964774d4cb2,0x0e938e13dd088e63,0x453c9af09879a724,0xb2bd09ec3740312b,0x0007a5837e23aaa1},{0x8e1ac4b319787bd4,0x7cb9fba402f67bfe,0x370b2951f9ec29cf,0x7a020172566f9d17,0x063e31753d703130,0x01551136265bade6}};
const fp2_t xPB = {{0xb702a70a8ae132ad,0x56d8804c83a8e696,0x5ac3e12f4df1792e,0x0a89da435664746e,0xd8758765206844bd,0x01a92f6e9e0e9296},{0x8aaab711b76b0959,0x210e6695ca5e5fdd,0x593be0d75909ca12,0xfbc074d8ebdeb927,0xb61fcc328d3756bc,0x0198a5942855c8bf}};
const fp2_t xQB = {{0x2b6b82b950b61fda,0x0ef2dd717daed334,0x99dee4db0b268ac9,0x3534eb384e1fcaf0,0xbaf112845a4f2d81,0x037f1492d8d815a1},{0x97e80590f9a0556b,0x7d9b4b87a22a7792,0xda4534fe75595b4b,0xbe1092a2733c03e1,0xbf5b1bd147b0d630,0x0125721476e5267f}};
const fp2_t xPQB = {{0xb7d459a56d4aebec,0x6ac7f10ba20e1e71,0x9a95a8928507f7ef,0xc4c5aff6b97f3dfe,0x644beb3e86806b77,0x022319eb6eaf072a},{0x8ad0f6b18934790e,0xdad82b7b38e166bf,0xcb08f5a3ab53d9a9,0xd2ff39b401ba8aba,0xbff9b5e40ed9e5ce,0x03c1773791f554c0}};
// 2^f-torsion basis for A=0
const fp2_t xP2 = {{0x7a26fdb0e5844206,0x0752b2ba140f7dfd,0x1728013f8f5fe257,0xd05f129975ed6bba,0xe736dbce707ad5a8,0x01f861715896d0be},{0xdac046927a0c5352,0x5a42474ac156ff18,0xe887982ff4c5a9ea,0x3875be6432251f1c,0xdfae47315af877ee,0x005627f085582ecc}};
const fp2_t xQ2 = {{0xc4f03ab3db57331b,0xf04261fc3b713778,0xa99b82430c7e40d1,0x5fe52b1324c2a091,0xfcaa2a7049d0f657,0x021f2caa09302141},{0x4a92a1d5ff9f6730,0x6dcd5f600f33783e,0xdb8b4e2e5149b45e,0x993458635c01d0c0,0x5f9bc7d3bb307f91,0x01fcc7eae4712b6a}};
const fp2_t xPQ2 = {{0x7f4ee9c86c4341a2,0x0c867f482063bdfc,0xe46fb7b0fbd479c7,0xddaa716e091be9ad,0x29239eadddf5dc59,0x0231c09c660f0a89},{0xde64fa344dd64237,0xa89aaaed3dd84555,0xbb70924d8fb73f27,0x0869ec018b3366dc,0x47a0356ce742bcbc,0x00547dbda6dc094d}};
// 3^g-torsion basis for A==0
const fp2_t xP3 = {{0x7c878d0ceaa821f0,0xf94db4cab7186625,0x7cff6d5fb0ca7867,0x4e3f5bd19cbca9d6,0x05ec8273d0042548,0x0233a79cf87040b3},{0x060e9f3dcab8192c,0xa94e86d063a46398,0x0e5cc403bfb60867,0x3ea1277f98087283,0xaff1fd95bb094917,0x025041b12719d3b8}};
const fp2_t xQ3 = {{0xb25aaa192bd351b7,0xc5db1962aed7e543,0x1f722ab174319947,0xd1c9bb4a0a5d8aa3,0x351415ec64f88921,0x0288ae044d62c930},{0xb41ede1724f8e06a,0xfb10ce5a83c66629,0x9846173e31a9d448,0x35c94966192f08db,0x72f7252946af3f9c,0x02ea05c971e7b34c}};
const fp2_t xPQ3 = {{0x674703cc3134d90b,0x507e338e496b8f75,0x0c8cb1f138346e4c,0x54cb7ad5ba580da7,0x65750f0bcd0a9857,0x038b435f51669e87},{0xdcdc0116c67589a0,0x45ce94f4d345c827,0x0f2cbfb3c53b73ea,0x03e7951bc98efbb8,0x3335ad0991864858,0x01e151a64210f74f}};
#endif

View File

@@ -1,17 +1 @@
set(SOURCE_FILES_EC_${SVARIANT_UPPER}_REF
${ECX_DIR}/poly-mul.c
${ECX_DIR}/poly-redc.c
${ECX_DIR}/ec.c
${ECX_DIR}/tedwards.c
${ECX_DIR}/kps.c
${ECX_DIR}/xisog.c
${ECX_DIR}/xeval.c
${ECX_DIR}/isog_chains.c
${ECX_DIR}/basis.c
)
add_library(${LIB_EC_${SVARIANT_UPPER}} ${SOURCE_FILES_EC_${SVARIANT_UPPER}_REF})
target_include_directories(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ${INC_GF_${SVARIANT_UPPER}} ${INC_COMMON} ${INC_EC})
target_compile_options(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
add_subdirectory(test)
include(../lvlx.cmake)

View File

@@ -1,36 +1 @@
add_executable(fp2.test_${SVARIANT_LOWER} ${ECX_DIR}/test/fp2-test.c)
target_include_directories(fp2.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_GF_${SVARIANT_UPPER}} ${INC_EC} ${INC_COMMON})
target_link_libraries(fp2.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}})
add_executable(poly-mul.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-mul-test.c)
target_include_directories(poly-mul.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-mul.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(poly-redc.test_${SVARIANT_LOWER} ${ECX_DIR}/test/poly-redc-test.c)
target_include_directories(poly-redc.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON})
target_link_libraries(poly-redc.test_${SVARIANT_LOWER} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(mont.test_${SVARIANT_LOWER} ${ECX_DIR}/test/mont-test.c)
target_include_directories(mont.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(mont.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(ec.test_${SVARIANT_LOWER} ${ECX_DIR}/test/ec-test.c ${ECX_DIR}/test/test_extras.c)
target_include_directories(ec.test_${SVARIANT_LOWER} PUBLIC ${ECX_DIR}/test ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(ec.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(velu.test_${SVARIANT_LOWER} ${ECX_DIR}/test/velu-test.c)
target_include_directories(velu.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(velu.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_executable(isog.test_${SVARIANT_LOWER} ${ECX_DIR}/test/isog-test.c)
target_include_directories(isog.test_${SVARIANT_LOWER} PUBLIC ${INC_GF_${SVARIANT_UPPER}} ${INC_INTBIG} ${INC_PRECOMP_${SVARIANT_UPPER}} ${PROJECT_SOURCE_DIR}/include ../include ${INC_EC} ${INC_COMMON} .)
target_link_libraries(isog.test_${SVARIANT_LOWER} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_INTBIG} ${LIB_GF_${SVARIANT_UPPER}} ${LIB_EC_${SVARIANT_UPPER}})
add_test(ec_fp2.test_${SVARIANT_LOWER} fp2.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-mul.test_${SVARIANT_LOWER} poly-mul.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_poly-redc.test_${SVARIANT_LOWER} poly-redc.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_mont.test_${SVARIANT_LOWER} mont.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_ec.test_${SVARIANT_LOWER} ec.test_${SVARIANT_LOWER} test ${SQISIGN_TEST_REPS})
add_test(ec_velu.test_${SVARIANT_LOWER} velu.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
add_test(ec_isog.test_${SVARIANT_LOWER} isog.test_${SVARIANT_LOWER} ${SQISIGN_TEST_REPS})
include(../../lvlx_test.cmake)

View File

@@ -1,400 +0,0 @@
#ifndef EC_TESTS_H
#define EC_TESTS_H
#include "test_extras.h"
#include <stdio.h>
#include <string.h>
#include <bench.h> //////// NOTE: enable later
#include "test-basis.h"
#include "ec_params.h"
// Global constants
extern const digit_t p[NWORDS_FIELD];
// Benchmark and test parameters
static int BENCH_LOOPS = 1000; // Number of iterations per bench
static int TEST_LOOPS = 512; // Number of iterations per test
bool ec_test()
{ // Tests for ecc arithmetic
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_point_t AC = {0};
digit_t k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing ecc functions: (NOT IMPLEMENTED) \n\n");
/*
// Point doubling
passed = 1;
P.x.re[0] = 0xDFD70ED0861BD329; P.x.re[1] = 0x20ACD3758C7F5540; P.x.re[2] = 0x3DCCDC007277F80A; P.x.re[3] = 0x18D6D2A22981DCE1;
P.x.im[0] = 0x3C23730A3F08F38C; P.x.im[1] = 0x98BB973AFD3D954D; P.x.im[2] = 0x8D98ADFC2829AE8A; P.x.im[3] = 0x21A2464D6369AFBA;
P.z.re[0] = 0x01;
AC.z.re[0] = 0x01;
fp2_tomont(&AC.z, &AC.z);
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
xDBL(&S, &R, &AC);
fp2_copy(&SS.x, &S.x); // Copy of S = SS <- 2P
fp2_copy(&SS.z, &S.z);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x5950EE0A4AF90FC8; R.x.re[1] = 0x16488065A0A98B08; R.x.re[2] = 0xCE65322229DA0FD1; R.x.re[3] = 0x270A35FF781EE204;
R.x.im[0] = 0x564447FD9EC57F6B; R.x.im[1] = 0x2EE24E984294F729; R.x.im[2] = 0x53A6C7360E972C71; R.x.im[3] = 0x4FCF4B9928A7C7E;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2)!=0) { passed=0; goto out0; }
Q.x.re[0] = 0xC46076A670C70053; Q.x.re[1] = 0x97517AFA3AB9ED13; Q.x.re[2] = 0x349644C942EDF993; Q.x.re[3] = 0xBB4A4DB6F29AF9E;
Q.x.im[0] = 0x8B47629FB5A15BB0; Q.x.im[1] = 0x4EC6E809953C1A10; Q.x.im[2] = 0x1F83F0EC6CBB84D6; Q.x.im[3] = 0x1D8417C1D33265D3;
Q.z.re[0] = 0x01;
PQ.x.re[0] = 0x853F66D11BE5534F; PQ.x.re[1] = 0x27C8FD4E52D03D4A; PQ.x.re[2] = 0xF88EA78D0A0C29D2; PQ.x.re[3] = 0x2F6DFB07D397A067;
PQ.x.im[0] = 0xE8DBC4AA34434BA1; PQ.x.im[1] = 0x7A73AE182636F8A0; PQ.x.im[2] = 0x419EC260137868EB; PQ.x.im[3] = 0x129B3E301703D43F;
PQ.z.re[0] = 0x01;
fp2_tomont(&S.x, &Q.x);
fp2_tomont(&S.z, &Q.z);
fp2_tomont(&PQ.x, &PQ.x);
fp2_tomont(&PQ.z, &PQ.z);
xADD(&S, &SS, &S, &PQ);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xED0BEB8F93AB4FF9; R.x.re[1] = 0x27CF508B80CD49BF; R.x.re[2] = 0x38A6134DFA04B2BA; R.x.re[3] = 0x27B4CB15E109EF1F;
R.x.im[0] = 0x6F731BA6FD227BDE; R.x.im[1] = 0x14C12335341167F8; R.x.im[2] = 0xECA7B60F7866E27A; R.x.im[3] = 0x2A7A79A152880457;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 126;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xDE80F87A1203A147; R.x.re[1] = 0xD59E1215928A3B2D; R.x.re[2] = 0xD5A67F83A5A8CE46; R.x.re[3] = 0xA11E162488C9CDF;
R.x.im[0] = 0x9417D0D79A26741B; R.x.im[1] = 0x8B1F47D6F0FE5EEC; R.x.im[2] = 0xE52188DCB054CE36; R.x.im[3] = 0x1A8075A6C3148AB3;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &P.x);
fp2_tomont(&R.z, &P.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
xMUL(&S, &R, k, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0xD3938B0A68A3E7C0; R.x.re[1] = 0xE0667113208A0595; R.x.re[2] = 0x258F314C84E9CB60; R.x.re[3] = 0x14984BA7CA59AB71;
R.x.im[0] = 0xFE728423EE3BFEF4; R.x.im[1] = 0xBF68C42FE21AE0E4; R.x.im[2] = 0xA8FAF9C9528609CA; R.x.im[3] = 0x1225EC77A1DC0285;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
fp2_tomont(&R.x, &Q.x);
fp2_tomont(&R.z, &Q.z);
k[0] = 0xE77AD6B6C6B2D8CD;
k[1] = 0xDE43A0B600F38D12;
k[2] = 0xA35F4A7897E17CE2;
k[3] = 0x10ACB62E614D1237;
l[0] = 0x34AB78B6C6B2D8C0;
l[1] = 0xDE6B2D8CD00F38D1;
l[2] = 0xA35F4A7897E17CE2;
l[3] = 0x20ACF4A789614D13;
fp2_inv(&SS.z);
fp2_mul(&SS.x, &SS.x, &SS.z);
fp2_copy(&SS.z, &R.z);
xDBLMUL(&S, &R, k, &SS, l, &PQ, (ec_curve_t*)&AC);
fp2_inv(&S.z);
fp2_mul(&S.x, &S.x, &S.z);
fp2_frommont(&S.x, &S.x);
R.x.re[0] = 0x554E1ADC609B992F; R.x.re[1] = 0xE407D961F8CC4C42; R.x.re[2] = 0x1CF626AFED5A68CE; R.x.re[3] = 0x6D02692EE110483;
R.x.im[0] = 0x16FB094E831C8997; R.x.im[1] = 0xFDE4ECF31DC5F702; R.x.im[2] = 0x89303D868DFAD7B4; R.x.im[3] = 0xC91ACE81346F22D;
if (compare_words((digit_t*)&R.x, (digit_t*)&S.x, NWORDS_FIELD*2) != 0) { passed = 0; goto out0; }
out0:
if (passed==1) printf(" ECC arithmetic tests ............................................ PASSED");
else { printf(" ECC arithmetic tests... FAILED"); printf("\n"); return false; }
printf("\n");
*/
return OK;
}
bool dlog_test()
{ // Tests for dlog
bool OK = true;
int passed;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
digit_t kt[NWORDS_ORDER], lt[NWORDS_ORDER], f1[NWORDS_ORDER] = {0}, f2[NWORDS_ORDER] = {0}, zero[NWORDS_ORDER] = {0}, tpFdiv2[NWORDS_ORDER] = {0}, tpF[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing dlog functions: \n\n");
// dlog2 testing
passed = 1;
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(f1, TWOpFm1);
fp_copy(f2, TWOpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[0] = 0xFFFFFFFFFFFFFFFF;
k[1] = 0x00000000000007FF;
l[0] = 0xFFFFFFFFFFFFFFFE;
l[1] = 0x00000000000007FF;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, f1, NWORDS_ORDER) == 1 ||
(compare_words(l, f1, NWORDS_ORDER) == 1 && (compare_words(k, zero, NWORDS_ORDER) == 0 || compare_words(k, f1, NWORDS_ORDER) == 0))) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, f2, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, f2, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog2 tests ..................................................... PASSED");
else { printf(" dlog2 tests... FAILED"); printf("\n"); return false; }
printf("\n");
// dlog3 testing
passed = 1;
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp_copy(tpFdiv2, THREEpFdiv2);
fp_copy(tpF, THREEpF);
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
k[1] = 0;
l[1] = 0;
k[0] = 0x02153E468B91C6D1;
l[0] = 0x02153E468B91C6D0;
for (int n = 0; n < TEST_LOOPS; n++)
{
k[0] -= 1;
l[0] -= 2;
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
memcpy(kt, k, NWORDS_ORDER*RADIX/8);
memcpy(lt, l, NWORDS_ORDER*RADIX/8);
if (compare_words(k, tpFdiv2, NWORDS_ORDER) == 1 ||
(compare_words(l, tpFdiv2, NWORDS_ORDER) == 1 && compare_words(k, zero, NWORDS_ORDER) == 0)) {
if (compare_words(k, zero, NWORDS_ORDER) != 0) {
sub_test(kt, tpF, kt, NWORDS_ORDER);
}
if (compare_words(l, zero, NWORDS_ORDER) != 0) {
sub_test(lt, tpF, lt, NWORDS_ORDER);
}
}
if (compare_words((digit_t*)scalarP, (digit_t*)kt, NWORDS_ORDER) != 0 || compare_words((digit_t*)scalarQ, (digit_t*)lt, NWORDS_ORDER) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" dlog3 tests ..................................................... PASSED");
else { printf(" dlog3 tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool ec_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P, Q, R, PQ, AC;
digit_t k[NWORDS_ORDER], l[NWORDS_ORDER];
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking ecc arithmetic: \n\n");
// Point doubling
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBL(&Q, &P, &AC);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" Montgomery x-only doubling runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point addition
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xADD(&R, &Q, &P, &PQ);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only addition runs in .............................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xMUL(&Q, &P, k, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only scalar multiplication runs in ................. %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Point multiplication
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
xDBLMUL(&R, &P, k, &Q, l, &PQ, (ec_curve_t*)&AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Montgomery x-only double-scalar multiplication runs in .......... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
bool dlog_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
ec_point_t P = {0}, Q = {0}, R = {0}, S = {0}, SS = {0}, PQ = {0};
ec_curve_t AC = {0};
ec_basis_t PQ2;
digit_t scalarP[NWORDS_ORDER], scalarQ[NWORDS_ORDER], k[NWORDS_ORDER] = {0}, l[NWORDS_ORDER] = {0};
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking dlog2: \n\n");
// dlog2 computation
fp2_tomont(&P.x, &xP2);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ2);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ2);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
AC.C.re[0] = 0x01;
fp2_tomont(&AC.C, &AC.C);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_2(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog2 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// dlog3 computation
fp2_tomont(&P.x, &xP3);
fp_mont_setone(P.z.re);
fp_set(P.z.im, 0);
fp2_tomont(&Q.x, &xQ3);
fp_mont_setone(Q.z.re);
fp_set(Q.z.im, 0);
fp2_tomont(&PQ.x, &xPQ3);
fp_mont_setone(PQ.z.re);
fp_set(PQ.z.im, 0);
copy_point(&PQ2.P, &P);
copy_point(&PQ2.Q, &Q);
copy_point(&PQ2.PmQ, &PQ);
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
fprandom_test(k); fprandom_test(l);
xDBLMUL(&R, &P, k, &Q, l, &PQ, &AC);
cycles1 = cpucycles();
ec_dlog_3(scalarP, scalarQ, &PQ2, &R, &AC);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" dlog3 runs in ................................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
#endif

View File

@@ -1,24 +0,0 @@
#ifndef TEST_BASIS_H
#define TEST_BASIS_H
#include "fp2.h"
// Full-torsion basis for A=0 (excluding 2^f and huge prime factors)
const fp2_t xPA = {{0x3c780e636a5869dc,0xb8a1d106332efe8e,0x7dd946e490e6578e,0x71d1fadbea881f88,0xb94912baba3999f0,0x85343be0a74ca9e1,0x22ae01775a9f7fa4,0x001032ffab70a66e},{0x15908a4b85221a67,0x342f82e6a1db4e1d,0x3d7c806a0d47b041,0x693830fad798c598,0xcfa244134a61827a,0x7f723d6f5d9628cf,0x10da657833d4d027,0x000c48499df01216}};
const fp2_t xQA = {{0x79a766df9c10c642,0x7677cb85097be8be,0x2a21c7f9b84b9deb,0xb263e837f57210ce,0x551d6636b7c7e061,0x78d332581bee10b2,0xce30a9926772e06c,0x00150b5009b1d6ed},{0xbb2f097dae470eb9,0x53940c6df1eb93a9,0x7786a4bab87320c1,0x89d32acc1c91db18,0x733ef7f139fb7f9b,0x7bc336ee25a3901b,0xf7dfe8f5559eeeb1,0x00210555ab63e7f3}};
const fp2_t xPQA = {{0x315ead6fadc8b0d6,0x7da37e8b7e94de95,0xcc6a9e206f513651,0x84fa9fab584acf3d,0x293b25689ac50519,0xe3222bd1c8154964,0x8ad7f39d04a8274f,0x000898edca69c223},{0x3e6c3e1864851e7e,0x01807c724f75ad5e,0xe9cd50eff4e66fb7,0x6c7c19a88fed9707,0x3ab57d0499386a40,0x6b5fd53c6efdc0b5,0x092fe030da27bc43,0x00076f2f409c5f8e}};
const fp2_t xPB = {{0x229e388475511856,0x2f6b17e9ec9258c0,0x0cb28c568697f9f4,0xca039e28512c9f9b,0xd52d823761b0daa2,0xa09c3800e22c5e3b,0x2971022668c3b76a,0x0006e91c4415afd1},{0xbd5059b7406e1dcd,0x9da456ed8c11f1a3,0x1fb30e9cf66f928e,0x867c348b2f488d26,0x9d4b03d8aa4229bc,0x1c01ca1088d145a8,0xc9d6a201d77644a1,0x000a0d45131bf5b0}};
const fp2_t xQB = {{0x712f0e5d0e3b4dfa,0x52260082dda1a07e,0x5a7513dcfd273829,0xc686f0976cbb5dcf,0xf5fc3df004cc7efc,0x615d0c2da4f2fb9f,0x796efbb3f65aede8,0x00028176c42e1d9f},{0xb8779b5a7bd2436b,0x4067b7e09d0ca56c,0xfdbaee6ff27ebe38,0x69310e98174025de,0x71960a10fa15706e,0x08ffb4b3f6efafbf,0xb7116ca162211ea3,0x00253c0f60765f1f}};
const fp2_t xPQB = {{0x0e90506c89b46e0c,0x24ec65d5deb4e5b9,0x8477f7e141db8725,0xf76957ec1940dbd3,0xc2857af32534e715,0x06820654c6bae5f4,0x5ac928ef3c90c1f8,0x0024f724366faeed},{0xf6d7d2fdb06b91c4,0xe603cf05ce3f7555,0x8a0876277637415c,0xa1ef891f00155f8f,0x159db3ac93d39d57,0x5a05683aeaa453ff,0x180c38da2402f6fc,0x000b69d01dcb9107}};
// 2^f-torsion basis for A=0
const fp2_t xP2 = {{0x5d453ee3e6de9bf6,0xb5e51a5e88d8bbf3,0xc91ce6ef41eda957,0x4e0ba74e86fd3385,0xeff87c1def35e01f,0xedcd6c20496988a5,0x91a2c14abdb955fe,0x000be92a3f4de175},{0xa8a13d8e0022a825,0xb26bb70885d42bef,0x2533c31e799596b4,0xc41d58b247fb5ac9,0x8d45fa188fd5cb65,0x1b0593f6e4af948d,0x0ede22e4fcbe17ca,0x0014f54c5d5e1308}};
const fp2_t xQ2 = {{0x90414b2365f868cd,0x68af18688f73fe25,0x46ca4c4b4ca19114,0xadae5e2564f79c98,0xfe3e09af9d00eb08,0x6856810a298a57bf,0x170d41ba9327205d,0x001d588b6744b4ea},{0xfb94e978bcf29be5,0x136700c07b264bd6,0x62a3c89d8466b8f9,0x9f990ca7d3084bd8,0xaab6fb1040e242d0,0x9e9325c5a5c20740,0xa9a6ee97f376e198,0x0003c8eee3581511}};
const fp2_t xPQ2 = {{0x873d426c501eafe6,0xdeb1e87769484669,0x57c38f42bd1fef4d,0x53ca12d14b2ded18,0xb72ef4a808fc9d70,0x59d9a54b1844cca1,0x6ca7ccb15b6a9e49,0x00132a12929654f7},{0xffc6b824b6603270,0xb4152cbd3b607298,0xbe97764acdcb16ce,0x5205b1ec222c3be9,0x0cf5ac18d1eb4984,0xf5233664fd72c328,0x492e775887a3367c,0x001ce6bdfc847b45}};
// 3^g-torsion basis for A=0
const fp2_t xP3 = {{0x807a6abcb56d1915,0x3ab8ff7df809ea8f,0x2bd4f1eba48b23ac,0xeb32542370dde5ff,0xe6c50551eaaf2329,0x545dceaf98f07f09,0x90bfb0e10f3e5b48,0x000cc0084da1b367},{0xbd6f9c82cd4acc13,0x9b39d0711267d8a2,0x0ff31ab9fd38bb36,0xccc169cd75c1a58b,0xd943ad3571e304b4,0xfc3cda0859595d00,0xabda66362732b019,0x00070c5abcf1f329}};
const fp2_t xQ3 = {{0x2b46bbfa6e57a9db,0xa7a5881479d3aaff,0x5c8106d57698b7cb,0xde0ccd3c436cd1ad,0xed351e8fbc28fd8f,0xe18a9a18e4f5bf03,0x9a98961a81073911,0x001ed93f47abe8f2},{0x5dc96ddee6e9a9eb,0x5e8905d15b918006,0xe89cecdc3f9b48f1,0x9d1a98543001e35e,0x0795c7b134dadeba,0x8050c48376f36d87,0xe9f364f7c6fbee1f,0x00061cb05b384f81}};
const fp2_t xPQ3 = {{0xd44970f662987227,0x4c8eda7256920e8d,0x857f42e972e25a0e,0xc66a5b62daa3644d,0x6ab4ded74a464c38,0x4157cc1048b85a3a,0x9916ab1ee4e2305a,0x000c6943137ffba1},{0x0c5118f818e5279d,0xacb0c4a011613c7a,0xb87b4a9cb16a7565,0xc997ccbe0159f318,0x6fc50720bce6f45f,0xbd1916a5ca7789d7,0x3f48f437fdeccc64,0x000674d925340bc4}};
#endif

17
src/ec/ref/lvlx.cmake Normal file
View File

@@ -0,0 +1,17 @@
set(SOURCE_FILES_EC_${SVARIANT_UPPER}_REF
${LVLX_DIR}/ec.c
${LVLX_DIR}/ec_jac.c
${LVLX_DIR}/xisog.c
${LVLX_DIR}/xeval.c
${LVLX_DIR}/isog_chains.c
${LVLX_DIR}/basis.c
${LVLX_DIR}/biextension.c
)
add_library(${LIB_EC_${SVARIANT_UPPER}} STATIC ${SOURCE_FILES_EC_${SVARIANT_UPPER}_REF})
target_include_directories(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ${INC_MP} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_EC})
target_compile_options(${LIB_EC_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
target_link_libraries(${LIB_EC_${SVARIANT_UPPER}} ${LIB_PRECOMP_${SVARIANT_UPPER}} ${LIB_MP} ${LIB_GF_${SVARIANT_UPPER}})
target_compile_definitions(${LIB_EC_${SVARIANT_UPPER}} PUBLIC SQISIGN_VARIANT=${SVARIANT_LOWER})
add_subdirectory(test)

416
src/ec/ref/lvlx/basis.c Normal file
View File

@@ -0,0 +1,416 @@
#include "ec.h"
#include "fp2.h"
#include "e0_basis.h"
#include <assert.h>
uint32_t
ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve)
{ // Recover y-coordinate of a point on the Montgomery curve y^2 = x^3 + Ax^2 + x
fp2_t t0;
fp2_sqr(&t0, Px);
fp2_mul(y, &t0, &curve->A); // Ax^2
fp2_add(y, y, Px); // Ax^2 + x
fp2_mul(&t0, &t0, Px);
fp2_add(y, y, &t0); // x^3 + Ax^2 + x
// This is required, because we do not yet know that our curves are
// supersingular so our points live on the twist with B = 1.
return fp2_sqrt_verify(y);
}
static void
difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve)
{
// Given P,Q in projective x-only, computes a deterministic choice for (P-Q)
// Based on Proposition 3 of https://eprint.iacr.org/2017/518.pdf
fp2_t Bxx, Bxz, Bzz, t0, t1;
fp2_mul(&t0, &P->x, &Q->x);
fp2_mul(&t1, &P->z, &Q->z);
fp2_sub(&Bxx, &t0, &t1);
fp2_sqr(&Bxx, &Bxx);
fp2_mul(&Bxx, &Bxx, &curve->C); // C*(P.x*Q.x-P.z*Q.z)^2
fp2_add(&Bxz, &t0, &t1);
fp2_mul(&t0, &P->x, &Q->z);
fp2_mul(&t1, &P->z, &Q->x);
fp2_add(&Bzz, &t0, &t1);
fp2_mul(&Bxz, &Bxz, &Bzz); // (P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x)
fp2_sub(&Bzz, &t0, &t1);
fp2_sqr(&Bzz, &Bzz);
fp2_mul(&Bzz, &Bzz, &curve->C); // C*(P.x*Q.z-P.z*Q.x)^2
fp2_mul(&Bxz, &Bxz, &curve->C); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x)
fp2_mul(&t0, &t0, &t1);
fp2_mul(&t0, &t0, &curve->A);
fp2_add(&t0, &t0, &t0);
fp2_add(&Bxz, &Bxz, &t0); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + 2*A*P.x*Q.z*P.z*Q.x
// To ensure that the denominator is a fourth power in Fp, we normalize by
// C*C_bar^2*(P.z)_bar^2*(Q.z)_bar^2
fp_copy(&t0.re, &curve->C.re);
fp_neg(&t0.im, &curve->C.im);
fp2_sqr(&t0, &t0);
fp2_mul(&t0, &t0, &curve->C);
fp_copy(&t1.re, &P->z.re);
fp_neg(&t1.im, &P->z.im);
fp2_sqr(&t1, &t1);
fp2_mul(&t0, &t0, &t1);
fp_copy(&t1.re, &Q->z.re);
fp_neg(&t1.im, &Q->z.im);
fp2_sqr(&t1, &t1);
fp2_mul(&t0, &t0, &t1);
fp2_mul(&Bxx, &Bxx, &t0);
fp2_mul(&Bxz, &Bxz, &t0);
fp2_mul(&Bzz, &Bzz, &t0);
// Solving quadratic equation
fp2_sqr(&t0, &Bxz);
fp2_mul(&t1, &Bxx, &Bzz);
fp2_sub(&t0, &t0, &t1);
// No need to check if t0 is square, as per the entangled basis algorithm.
fp2_sqrt(&t0);
fp2_add(&PQ->x, &Bxz, &t0);
fp2_copy(&PQ->z, &Bzz);
}
// Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and the point
// P = (X/Z : 1). For generic implementation see lift_basis()
uint32_t
lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E)
{
assert(fp2_is_one(&B->P.z));
assert(fp2_is_one(&E->C));
fp2_copy(&P->x, &B->P.x);
fp2_copy(&Q->x, &B->Q.x);
fp2_copy(&Q->z, &B->Q.z);
fp2_set_one(&P->z);
uint32_t ret = ec_recover_y(&P->y, &P->x, E);
// Algorithm of Okeya-Sakurai to recover y.Q in the montgomery model
fp2_t v1, v2, v3, v4;
fp2_mul(&v1, &P->x, &Q->z);
fp2_add(&v2, &Q->x, &v1);
fp2_sub(&v3, &Q->x, &v1);
fp2_sqr(&v3, &v3);
fp2_mul(&v3, &v3, &B->PmQ.x);
fp2_add(&v1, &E->A, &E->A);
fp2_mul(&v1, &v1, &Q->z);
fp2_add(&v2, &v2, &v1);
fp2_mul(&v4, &P->x, &Q->x);
fp2_add(&v4, &v4, &Q->z);
fp2_mul(&v2, &v2, &v4);
fp2_mul(&v1, &v1, &Q->z);
fp2_sub(&v2, &v2, &v1);
fp2_mul(&v2, &v2, &B->PmQ.z);
fp2_sub(&Q->y, &v3, &v2);
fp2_add(&v1, &P->y, &P->y);
fp2_mul(&v1, &v1, &Q->z);
fp2_mul(&v1, &v1, &B->PmQ.z);
fp2_mul(&Q->x, &Q->x, &v1);
fp2_mul(&Q->z, &Q->z, &v1);
// Transforming to a jacobian coordinate
fp2_sqr(&v1, &Q->z);
fp2_mul(&Q->y, &Q->y, &v1);
fp2_mul(&Q->x, &Q->x, &Q->z);
return ret;
}
uint32_t
lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E)
{
// Normalise the curve E such that (A : C) is (A/C : 1)
// and the point x(P) = (X/Z : 1).
fp2_t inverses[2];
fp2_copy(&inverses[0], &B->P.z);
fp2_copy(&inverses[1], &E->C);
fp2_batched_inv(inverses, 2);
fp2_set_one(&B->P.z);
fp2_set_one(&E->C);
fp2_mul(&B->P.x, &B->P.x, &inverses[0]);
fp2_mul(&E->A, &E->A, &inverses[1]);
// Lift the basis to Jacobian points P, Q
return lift_basis_normalized(P, Q, B, E);
}
// Given an x-coordinate, determines if this is a valid
// point on the curve. Assumes C=1.
static uint32_t
is_on_curve(const fp2_t *x, const ec_curve_t *curve)
{
assert(fp2_is_one(&curve->C));
fp2_t t0;
fp2_add(&t0, x, &curve->A); // x + (A/C)
fp2_mul(&t0, &t0, x); // x^2 + (A/C)*x
fp2_add_one(&t0, &t0); // x^2 + (A/C)*x + 1
fp2_mul(&t0, &t0, x); // x^3 + (A/C)*x^2 + x
return fp2_is_square(&t0);
}
// Helper function which given a point of order k*2^n with n maximal
// and k odd, computes a point of order 2^f
static inline void
clear_cofactor_for_maximal_even_order(ec_point_t *P, ec_curve_t *curve, int f)
{
// clear out the odd cofactor to get a point of order 2^n
ec_mul(P, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, P, curve);
// clear the power of two to get a point of order 2^f
for (int i = 0; i < TORSION_EVEN_POWER - f; i++) {
xDBL_A24(P, P, &curve->A24, curve->is_A24_computed_and_normalized);
}
}
// Helper function which finds an NQR -1 / (1 + i*b) for entangled basis generation
static uint8_t
find_nqr_factor(fp2_t *x, ec_curve_t *curve, const uint8_t start)
{
// factor = -1/(1 + i*b) for b in Fp will be NQR whenever 1 + b^2 is NQR
// in Fp, so we find one of these and then invert (1 + i*b). We store b
// as a u8 hint to save time in verification.
// We return the hint as a u8, but use (uint16_t)n to give 2^16 - 1
// to make failure cryptographically negligible, with a fallback when
// n > 128 is required.
uint8_t hint;
uint32_t found = 0;
uint16_t n = start;
bool qr_b = 1;
fp_t b, tmp;
fp2_t z, t0, t1;
do {
while (qr_b) {
// find b with 1 + b^2 a non-quadratic residue
fp_set_small(&tmp, (uint32_t)n * n + 1);
qr_b = fp_is_square(&tmp);
n++; // keeps track of b = n - 1
}
// for Px := -A/(1 + i*b) to be on the curve
// is equivalent to A^2*(z-1) - z^2 NQR for z = 1 + i*b
// thus prevents unnecessary inversion pre-check
// t0 = z - 1 = i*b
// t1 = z = 1 + i*b
fp_set_small(&b, (uint32_t)n - 1);
fp2_set_zero(&t0);
fp2_set_one(&z);
fp_copy(&z.im, &b);
fp_copy(&t0.im, &b);
// A^2*(z-1) - z^2
fp2_sqr(&t1, &curve->A);
fp2_mul(&t0, &t0, &t1); // A^2 * (z - 1)
fp2_sqr(&t1, &z);
fp2_sub(&t0, &t0, &t1); // A^2 * (z - 1) - z^2
found = !fp2_is_square(&t0);
qr_b = 1;
} while (!found);
// set Px to -A/(1 + i*b)
fp2_copy(x, &z);
fp2_inv(x);
fp2_mul(x, x, &curve->A);
fp2_neg(x, x);
/*
* With very low probability n will not fit in 7 bits.
* We set hint = 0 which signals failure and the need
* to generate a value on the fly during verification
*/
hint = n <= 128 ? n - 1 : 0;
return hint;
}
// Helper function which finds a point x(P) = n * A
static uint8_t
find_nA_x_coord(fp2_t *x, ec_curve_t *curve, const uint8_t start)
{
assert(!fp2_is_square(&curve->A)); // Only to be called when A is a NQR
// when A is NQR we allow x(P) to be a multiple n*A of A
uint8_t n = start;
if (n == 1) {
fp2_copy(x, &curve->A);
} else {
fp2_mul_small(x, &curve->A, n);
}
while (!is_on_curve(x, curve)) {
fp2_add(x, x, &curve->A);
n++;
}
/*
* With very low probability (1/2^128), n will not fit in 7 bits.
* In this case, we set hint = 0 which signals failure and the need
* to generate a value on the fly during verification
*/
uint8_t hint = n < 128 ? n : 0;
return hint;
}
// The entangled basis generation does not allow A = 0
// so we simply return the one we have already precomputed
static void
ec_basis_E0_2f(ec_basis_t *PQ2, ec_curve_t *curve, int f)
{
assert(fp2_is_zero(&curve->A));
ec_point_t P, Q;
// Set P, Q to precomputed (X : 1) values
fp2_copy(&P.x, &BASIS_E0_PX);
fp2_copy(&Q.x, &BASIS_E0_QX);
fp2_set_one(&P.z);
fp2_set_one(&Q.z);
// clear the power of two to get a point of order 2^f
for (int i = 0; i < TORSION_EVEN_POWER - f; i++) {
xDBL_E0(&P, &P);
xDBL_E0(&Q, &Q);
}
// Set P, Q in the basis and compute x(P - Q)
copy_point(&PQ2->P, &P);
copy_point(&PQ2->Q, &Q);
difference_point(&PQ2->PmQ, &P, &Q, curve);
}
// Computes a basis E[2^f] = <P, Q> where the point Q is above (0 : 0)
// and stores hints as an array for faster recomputation at a later point
uint8_t
ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f)
{
// Normalise (A/C : 1) and ((A + 2)/4 : 1)
ec_normalize_curve_and_A24(curve);
if (fp2_is_zero(&curve->A)) {
ec_basis_E0_2f(PQ2, curve, f);
return 0;
}
uint8_t hint;
bool hint_A = fp2_is_square(&curve->A);
// Compute the points P, Q
ec_point_t P, Q;
if (!hint_A) {
// when A is NQR we allow x(P) to be a multiple n*A of A
hint = find_nA_x_coord(&P.x, curve, 1);
} else {
// when A is QR we instead have to find (1 + b^2) a NQR
// such that x(P) = -A / (1 + i*b)
hint = find_nqr_factor(&P.x, curve, 1);
}
fp2_set_one(&P.z);
fp2_add(&Q.x, &curve->A, &P.x);
fp2_neg(&Q.x, &Q.x);
fp2_set_one(&Q.z);
// clear out the odd cofactor to get a point of order 2^f
clear_cofactor_for_maximal_even_order(&P, curve, f);
clear_cofactor_for_maximal_even_order(&Q, curve, f);
// compute PmQ, set PmQ to Q to ensure Q above (0,0)
difference_point(&PQ2->Q, &P, &Q, curve);
copy_point(&PQ2->P, &P);
copy_point(&PQ2->PmQ, &Q);
// Finally, we compress hint_A and hint into a single bytes.
// We choose to set the LSB of hint to hint_A
assert(hint < 128); // We expect hint to be 7-bits in size
return (hint << 1) | hint_A;
}
// Computes a basis E[2^f] = <P, Q> where the point Q is above (0 : 0)
// given the hints as an array for faster basis computation
int
ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint)
{
// Normalise (A/C : 1) and ((A + 2)/4 : 1)
ec_normalize_curve_and_A24(curve);
if (fp2_is_zero(&curve->A)) {
ec_basis_E0_2f(PQ2, curve, f);
return 1;
}
// The LSB of hint encodes whether A is a QR
// The remaining 7-bits are used to find a valid x(P)
bool hint_A = hint & 1;
uint8_t hint_P = hint >> 1;
// Compute the points P, Q
ec_point_t P, Q;
if (!hint_P) {
// When hint_P = 0 it means we did not find a point in 128 attempts
// this is very rare and we almost never expect to need this fallback
// In either case, we can start with b = 128 to skip testing the known
// values which will not work
if (!hint_A) {
find_nA_x_coord(&P.x, curve, 128);
} else {
find_nqr_factor(&P.x, curve, 128);
}
} else {
// Otherwise we use the hint to directly find x(P) based on hint_A
if (!hint_A) {
// when A is NQR, we have found n such that x(P) = n*A
fp2_mul_small(&P.x, &curve->A, hint_P);
} else {
// when A is QR we have found b such that (1 + b^2) is a NQR in
// Fp, so we must compute x(P) = -A / (1 + i*b)
fp_set_one(&P.x.re);
fp_set_small(&P.x.im, hint_P);
fp2_inv(&P.x);
fp2_mul(&P.x, &P.x, &curve->A);
fp2_neg(&P.x, &P.x);
}
}
fp2_set_one(&P.z);
#ifndef NDEBUG
int passed = 1;
passed = is_on_curve(&P.x, curve);
passed &= !fp2_is_square(&P.x);
if (!passed)
return 0;
#endif
// set xQ to -xP - A
fp2_add(&Q.x, &curve->A, &P.x);
fp2_neg(&Q.x, &Q.x);
fp2_set_one(&Q.z);
// clear out the odd cofactor to get a point of order 2^f
clear_cofactor_for_maximal_even_order(&P, curve, f);
clear_cofactor_for_maximal_even_order(&Q, curve, f);
// compute PmQ, set PmQ to Q to ensure Q above (0,0)
difference_point(&PQ2->Q, &P, &Q, curve);
copy_point(&PQ2->P, &P);
copy_point(&PQ2->PmQ, &Q);
#ifndef NDEBUG
passed &= test_basis_order_twof(PQ2, curve, f);
if (!passed)
return 0;
#endif
return 1;
}

View File

@@ -0,0 +1,770 @@
#include <biextension.h>
#include <assert.h>
#include <inttypes.h>
#include <mp.h>
/*
* We implement the biextension arithmetic by using the cubical torsor
* representation. For now only implement the 2^e-ladder.
*
* Warning: cubicalADD is off by a factor x4 with respect to the correct
* cubical arithmetic. This does not affect the Weil pairing or the Tate
* pairing over F_{p^2} (due to the final exponentiation), but would give
* the wrong result if we compute the Tate pairing over F_p.
*/
// this would be exactly like xADD if PQ was 'antinormalised' as (1,z)
// Cost: 3M + 2S + 3a + 3s
// Note: if needed, cubicalDBL is simply xDBL_A24 normalized and
// costs 3M + 2S + 2a + 2s
static void
cubicalADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const fp2_t *ixPQ)
{
fp2_t t0, t1, t2, t3;
fp2_add(&t0, &P->x, &P->z);
fp2_sub(&t1, &P->x, &P->z);
fp2_add(&t2, &Q->x, &Q->z);
fp2_sub(&t3, &Q->x, &Q->z);
fp2_mul(&t0, &t0, &t3);
fp2_mul(&t1, &t1, &t2);
fp2_add(&t2, &t0, &t1);
fp2_sub(&t3, &t0, &t1);
fp2_sqr(&R->z, &t3);
fp2_sqr(&t2, &t2);
fp2_mul(&R->x, ixPQ, &t2);
}
// Given cubical reps of P, Q and x(P - Q) = (1 : ixPQ)
// compute P + Q, [2]Q
// Cost: 6M + 4S + 4a + 4s
static void
cubicalDBLADD(ec_point_t *PpQ,
ec_point_t *QQ,
const ec_point_t *P,
const ec_point_t *Q,
const fp2_t *ixPQ,
const ec_point_t *A24)
{
// A24 = (A+2C/4C: 1)
assert(fp2_is_one(&A24->z));
fp2_t t0, t1, t2, t3;
fp2_add(&t0, &P->x, &P->z);
fp2_sub(&t1, &P->x, &P->z);
fp2_add(&PpQ->x, &Q->x, &Q->z);
fp2_sub(&t3, &Q->x, &Q->z);
fp2_sqr(&t2, &PpQ->x);
fp2_sqr(&QQ->z, &t3);
fp2_mul(&t0, &t0, &t3);
fp2_mul(&t1, &t1, &PpQ->x);
fp2_add(&PpQ->x, &t0, &t1);
fp2_sub(&t3, &t0, &t1);
fp2_sqr(&PpQ->z, &t3);
fp2_sqr(&PpQ->x, &PpQ->x);
fp2_mul(&PpQ->x, ixPQ, &PpQ->x);
fp2_sub(&t3, &t2, &QQ->z);
fp2_mul(&QQ->x, &t2, &QQ->z);
fp2_mul(&t0, &t3, &A24->x);
fp2_add(&t0, &t0, &QQ->z);
fp2_mul(&QQ->z, &t0, &t3);
}
// iterative biextension doubling
static void
biext_ladder_2e(uint32_t e,
ec_point_t *PnQ,
ec_point_t *nQ,
const ec_point_t *PQ,
const ec_point_t *Q,
const fp2_t *ixP,
const ec_point_t *A24)
{
copy_point(PnQ, PQ);
copy_point(nQ, Q);
for (uint32_t i = 0; i < e; i++) {
cubicalDBLADD(PnQ, nQ, PnQ, nQ, ixP, A24);
}
}
// Compute the monodromy ratio X/Z above as a (X:Z) point to avoid a division
// We implicitly use (1,0) as a cubical point above 0_E
static void
point_ratio(ec_point_t *R, const ec_point_t *PnQ, const ec_point_t *nQ, const ec_point_t *P)
{
// Sanity tests
assert(ec_is_zero(nQ));
assert(ec_is_equal(PnQ, P));
fp2_mul(&R->x, &nQ->x, &P->x);
fp2_copy(&R->z, &PnQ->x);
}
// Compute the cubical translation of P by a point of 2-torsion T
static void
translate(ec_point_t *P, const ec_point_t *T)
{
// When we translate, the following three things can happen:
// T = (A : 0) then the translation of P should be P
// T = (0 : B) then the translation of P = (X : Z) should be (Z : X)
// Otherwise T = (A : B) and P translates to (AX - BZ : BX - AZ)
// We compute this in constant time by computing the generic case
// and then using constant time swaps.
fp2_t PX_new, PZ_new;
{
fp2_t t0, t1;
// PX_new = AX - BZ
fp2_mul(&t0, &T->x, &P->x);
fp2_mul(&t1, &T->z, &P->z);
fp2_sub(&PX_new, &t0, &t1);
// PZ_new = BX - AZ
fp2_mul(&t0, &T->z, &P->x);
fp2_mul(&t1, &T->x, &P->z);
fp2_sub(&PZ_new, &t0, &t1);
}
// When we have A zero we should return (Z : X)
uint32_t TA_is_zero = fp2_is_zero(&T->x);
fp2_select(&PX_new, &PX_new, &P->z, TA_is_zero);
fp2_select(&PZ_new, &PZ_new, &P->x, TA_is_zero);
// When we have B zero we should return (X : Z)
uint32_t TB_is_zero = fp2_is_zero(&T->z);
fp2_select(&PX_new, &PX_new, &P->x, TB_is_zero);
fp2_select(&PZ_new, &PZ_new, &P->z, TB_is_zero);
// Set the point to the desired result
fp2_copy(&P->x, &PX_new);
fp2_copy(&P->z, &PZ_new);
}
// Compute the biextension monodromy g_P,Q^{2^g} (in level 1) via the
// cubical arithmetic of P+2^e Q.
// The suffix _i means that we are given 1/x(P) as parameter. Warning: to
// get meaningful result when using the monodromy to compute pairings, we
// need P, Q, PQ, A24 to be normalised (this is not strictly necessary, but
// care need to be taken when they are not normalised. Only handle the
// normalised case for now)
static void
monodromy_i(ec_point_t *R, const pairing_params_t *pairing_data, bool swap_PQ)
{
fp2_t ixP;
ec_point_t P, Q, PnQ, nQ;
// When we compute the Weil pairing we need both P + [2^e]Q and
// Q + [2^e]P which we can do easily with biext_ladder_2e() below
// we use a bool to decide wether to use Q, ixP or P, ixQ in the
// ladder and P or Q in translation.
if (!swap_PQ) {
copy_point(&P, &pairing_data->P);
copy_point(&Q, &pairing_data->Q);
fp2_copy(&ixP, &pairing_data->ixP);
} else {
copy_point(&P, &pairing_data->Q);
copy_point(&Q, &pairing_data->P);
fp2_copy(&ixP, &pairing_data->ixQ);
}
// Compute the biextension ladder P + [2^e]Q
biext_ladder_2e(pairing_data->e - 1, &PnQ, &nQ, &pairing_data->PQ, &Q, &ixP, &pairing_data->A24);
translate(&PnQ, &nQ);
translate(&nQ, &nQ);
point_ratio(R, &PnQ, &nQ, &P);
}
// Normalize the points and also store 1/x(P), 1/x(Q)
static void
cubical_normalization(pairing_params_t *pairing_data, const ec_point_t *P, const ec_point_t *Q)
{
fp2_t t[4];
fp2_copy(&t[0], &P->x);
fp2_copy(&t[1], &P->z);
fp2_copy(&t[2], &Q->x);
fp2_copy(&t[3], &Q->z);
fp2_batched_inv(t, 4);
// Store PZ / PX and QZ / QX
fp2_mul(&pairing_data->ixP, &P->z, &t[0]);
fp2_mul(&pairing_data->ixQ, &Q->z, &t[2]);
// Store x(P), x(Q) normalised to (X/Z : 1)
fp2_mul(&pairing_data->P.x, &P->x, &t[1]);
fp2_mul(&pairing_data->Q.x, &Q->x, &t[3]);
fp2_set_one(&pairing_data->P.z);
fp2_set_one(&pairing_data->Q.z);
}
// Weil pairing, PQ should be P+Q in (X:Z) coordinates
// We assume the points are normalised correctly
static void
weil_n(fp2_t *r, const pairing_params_t *pairing_data)
{
ec_point_t R0, R1;
monodromy_i(&R0, pairing_data, true);
monodromy_i(&R1, pairing_data, false);
fp2_mul(r, &R0.x, &R1.z);
fp2_inv(r);
fp2_mul(r, r, &R0.z);
fp2_mul(r, r, &R1.x);
}
// Weil pairing, PQ should be P+Q in (X:Z) coordinates
// Normalise the points and call the code above
// The code will crash (division by 0) if either P or Q is (0:1)
void
weil(fp2_t *r, uint32_t e, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ, ec_curve_t *E)
{
pairing_params_t pairing_data;
// Construct the structure for the Weil pairing
// Set (PX/PZ : 1), (QX : QZ : 1), PZ/PX and QZ/QX
pairing_data.e = e;
cubical_normalization(&pairing_data, P, Q);
copy_point(&pairing_data.PQ, PQ);
// Ensure the input curve has A24 normalised and store
// in a struct
ec_curve_normalize_A24(E);
copy_point(&pairing_data.A24, &E->A24);
// Compute the Weil pairing e_(2^n)(P, Q)
weil_n(r, &pairing_data);
}
// two helper functions for reducing the tate pairing
// clear_cofac clears (p + 1) // 2^f for an Fp2 value
void
clear_cofac(fp2_t *r, const fp2_t *a)
{
digit_t exp = *p_cofactor_for_2f;
exp >>= 1;
fp2_t x;
fp2_copy(&x, a);
fp2_copy(r, a);
// removes cofac
while (exp > 0) {
fp2_sqr(r, r);
if (exp & 1) {
fp2_mul(r, r, &x);
}
exp >>= 1;
}
}
// applies frobenius a + ib --> a - ib to an fp2 element
void
fp2_frob(fp2_t *out, const fp2_t *in)
{
fp_copy(&(out->re), &(in->re));
fp_neg(&(out->im), &(in->im));
}
// reduced Tate pairing, normalizes the points, assumes PQ is P+Q in (X:Z)
// coordinates. Computes 1/x(P) and 1/x(Q) for efficient cubical ladder
void
reduced_tate(fp2_t *r, uint32_t e, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ, ec_curve_t *E)
{
uint32_t e_full = TORSION_EVEN_POWER;
uint32_t e_diff = e_full - e;
ec_point_t R;
pairing_params_t pairing_data;
// Construct the structure for the Weil pairing
// Set (PX/PZ : 1), (QX : QZ : 1), PZ/PX and QZ/QX
pairing_data.e = e;
cubical_normalization(&pairing_data, P, Q);
copy_point(&pairing_data.PQ, PQ);
// Ensure the input curve has A24 normalised and store
// in a struct
ec_curve_normalize_A24(E);
copy_point(&pairing_data.A24, &E->A24);
monodromy_i(&R, &pairing_data, true);
// we get unreduced tate as R.X, R.Z
// reduced tate is -(R.Z/R.X)^((p^2 - 1) div 2^f)
// we reuse R.X and R.Z to split reduction step ^(p-1) into frobenius and ^-1
fp2_t frob, tmp;
fp2_copy(&tmp, &R.x);
fp2_frob(&frob, &R.x);
fp2_mul(&R.x, &R.z, &frob);
fp2_frob(&frob, &R.z);
fp2_mul(&R.z, &tmp, &frob);
fp2_inv(&R.x);
fp2_mul(r, &R.x, &R.z);
clear_cofac(r, r);
// clear remaining 2^e_diff
for (uint32_t j = 0; j < e_diff; j++) {
fp2_sqr(r, r);
}
}
// Functions to compute discrete logs by computing the Weil pairing of points
// followed by computing the dlog in Fp^2
// (If we work with full order points, it would be faster to use the Tate
// pairings rather than the Weil pairings; this is not implemented yet)
// recursive dlog function
static bool
fp2_dlog_2e_rec(digit_t *a, long len, fp2_t *pows_f, fp2_t *pows_g, long stacklen)
{
if (len == 0) {
// *a = 0;
for (int i = 0; i < NWORDS_ORDER; i++) {
a[i] = 0;
}
return true;
} else if (len == 1) {
if (fp2_is_one(&pows_f[stacklen - 1])) {
// a = 0;
for (int i = 0; i < NWORDS_ORDER; i++) {
a[i] = 0;
}
for (int i = 0; i < stacklen - 1; ++i) {
fp2_sqr(&pows_g[i], &pows_g[i]); // new_g = g^2
}
return true;
} else if (fp2_is_equal(&pows_f[stacklen - 1], &pows_g[stacklen - 1])) {
// a = 1;
a[0] = 1;
for (int i = 1; i < NWORDS_ORDER; i++) {
a[i] = 0;
}
for (int i = 0; i < stacklen - 1; ++i) {
fp2_mul(&pows_f[i], &pows_f[i], &pows_g[i]); // new_f = f*g
fp2_sqr(&pows_g[i], &pows_g[i]); // new_g = g^2
}
return true;
} else {
return false;
}
} else {
long right = (double)len * 0.5;
long left = len - right;
pows_f[stacklen] = pows_f[stacklen - 1];
pows_g[stacklen] = pows_g[stacklen - 1];
for (int i = 0; i < left; i++) {
fp2_sqr(&pows_f[stacklen], &pows_f[stacklen]);
fp2_sqr(&pows_g[stacklen], &pows_g[stacklen]);
}
// uint32_t dlp1 = 0, dlp2 = 0;
digit_t dlp1[NWORDS_ORDER], dlp2[NWORDS_ORDER];
bool ok;
ok = fp2_dlog_2e_rec(dlp1, right, pows_f, pows_g, stacklen + 1);
if (!ok)
return false;
ok = fp2_dlog_2e_rec(dlp2, left, pows_f, pows_g, stacklen);
if (!ok)
return false;
// a = dlp1 + 2^right * dlp2
multiple_mp_shiftl(dlp2, right, NWORDS_ORDER);
mp_add(a, dlp2, dlp1, NWORDS_ORDER);
return true;
}
}
// compute DLP: compute scal such that f = g^scal with f, 1/g as input
static bool
fp2_dlog_2e(digit_t *scal, const fp2_t *f, const fp2_t *g_inverse, int e)
{
long log, len = e;
for (log = 0; len > 1; len >>= 1)
log++;
log += 1;
fp2_t pows_f[log], pows_g[log];
pows_f[0] = *f;
pows_g[0] = *g_inverse;
for (int i = 0; i < NWORDS_ORDER; i++) {
scal[i] = 0;
}
bool ok = fp2_dlog_2e_rec(scal, e, pows_f, pows_g, 1);
assert(ok);
return ok;
}
// Normalize the bases (P, Q), (R, S) and store their inverse
// and additionally normalise the curve to (A/C : 1)
static void
cubical_normalization_dlog(pairing_dlog_params_t *pairing_dlog_data, ec_curve_t *curve)
{
fp2_t t[11];
ec_basis_t *PQ = &pairing_dlog_data->PQ;
ec_basis_t *RS = &pairing_dlog_data->RS;
fp2_copy(&t[0], &PQ->P.x);
fp2_copy(&t[1], &PQ->P.z);
fp2_copy(&t[2], &PQ->Q.x);
fp2_copy(&t[3], &PQ->Q.z);
fp2_copy(&t[4], &PQ->PmQ.x);
fp2_copy(&t[5], &PQ->PmQ.z);
fp2_copy(&t[6], &RS->P.x);
fp2_copy(&t[7], &RS->P.z);
fp2_copy(&t[8], &RS->Q.x);
fp2_copy(&t[9], &RS->Q.z);
fp2_copy(&t[10], &curve->C);
fp2_batched_inv(t, 11);
fp2_mul(&pairing_dlog_data->ixP, &PQ->P.z, &t[0]);
fp2_mul(&PQ->P.x, &PQ->P.x, &t[1]);
fp2_set_one(&PQ->P.z);
fp2_mul(&pairing_dlog_data->ixQ, &PQ->Q.z, &t[2]);
fp2_mul(&PQ->Q.x, &PQ->Q.x, &t[3]);
fp2_set_one(&PQ->Q.z);
fp2_mul(&PQ->PmQ.x, &PQ->PmQ.x, &t[5]);
fp2_set_one(&PQ->PmQ.z);
fp2_mul(&pairing_dlog_data->ixR, &RS->P.z, &t[6]);
fp2_mul(&RS->P.x, &RS->P.x, &t[7]);
fp2_set_one(&RS->P.z);
fp2_mul(&pairing_dlog_data->ixS, &RS->Q.z, &t[8]);
fp2_mul(&RS->Q.x, &RS->Q.x, &t[9]);
fp2_set_one(&RS->Q.z);
fp2_mul(&curve->A, &curve->A, &t[10]);
fp2_set_one(&curve->C);
}
// Given two bases <P, Q> and basis = <R, S> compute
// x(P - R), x(P - S), x(R - Q), x(S - Q)
static void
compute_difference_points(pairing_dlog_params_t *pairing_dlog_data, ec_curve_t *curve)
{
jac_point_t xyP, xyQ, xyR, xyS, temp;
// lifting the two basis points, assumes that x(P) and x(R)
// and the curve itself are normalised to (X : 1)
lift_basis_normalized(&xyP, &xyQ, &pairing_dlog_data->PQ, curve);
lift_basis_normalized(&xyR, &xyS, &pairing_dlog_data->RS, curve);
// computation of the differences
// x(P - R)
jac_neg(&temp, &xyR);
ADD(&temp, &temp, &xyP, curve);
jac_to_xz(&pairing_dlog_data->diff.PmR, &temp);
// x(P - S)
jac_neg(&temp, &xyS);
ADD(&temp, &temp, &xyP, curve);
jac_to_xz(&pairing_dlog_data->diff.PmS, &temp);
// x(R - Q)
jac_neg(&temp, &xyQ);
ADD(&temp, &temp, &xyR, curve);
jac_to_xz(&pairing_dlog_data->diff.RmQ, &temp);
// x(S - Q)
jac_neg(&temp, &xyQ);
ADD(&temp, &temp, &xyS, curve);
jac_to_xz(&pairing_dlog_data->diff.SmQ, &temp);
}
// Inline all the Weil pairing computations needed for ec_dlog_2_weil
static void
weil_dlog(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, pairing_dlog_params_t *pairing_dlog_data)
{
ec_point_t nP, nQ, nR, nS, nPQ, PnQ, nPR, PnR, nPS, PnS, nRQ, RnQ, nSQ, SnQ;
copy_point(&nP, &pairing_dlog_data->PQ.P);
copy_point(&nQ, &pairing_dlog_data->PQ.Q);
copy_point(&nR, &pairing_dlog_data->RS.P);
copy_point(&nS, &pairing_dlog_data->RS.Q);
copy_point(&nPQ, &pairing_dlog_data->PQ.PmQ);
copy_point(&PnQ, &pairing_dlog_data->PQ.PmQ);
copy_point(&nPR, &pairing_dlog_data->diff.PmR);
copy_point(&nPS, &pairing_dlog_data->diff.PmS);
copy_point(&PnR, &pairing_dlog_data->diff.PmR);
copy_point(&PnS, &pairing_dlog_data->diff.PmS);
copy_point(&nRQ, &pairing_dlog_data->diff.RmQ);
copy_point(&nSQ, &pairing_dlog_data->diff.SmQ);
copy_point(&RnQ, &pairing_dlog_data->diff.RmQ);
copy_point(&SnQ, &pairing_dlog_data->diff.SmQ);
for (uint32_t i = 0; i < pairing_dlog_data->e - 1; i++) {
cubicalADD(&nPQ, &nPQ, &nP, &pairing_dlog_data->ixQ);
cubicalADD(&nPR, &nPR, &nP, &pairing_dlog_data->ixR);
cubicalDBLADD(&nPS, &nP, &nPS, &nP, &pairing_dlog_data->ixS, &pairing_dlog_data->A24);
cubicalADD(&PnQ, &PnQ, &nQ, &pairing_dlog_data->ixP);
cubicalADD(&RnQ, &RnQ, &nQ, &pairing_dlog_data->ixR);
cubicalDBLADD(&SnQ, &nQ, &SnQ, &nQ, &pairing_dlog_data->ixS, &pairing_dlog_data->A24);
cubicalADD(&PnR, &PnR, &nR, &pairing_dlog_data->ixP);
cubicalDBLADD(&nRQ, &nR, &nRQ, &nR, &pairing_dlog_data->ixQ, &pairing_dlog_data->A24);
cubicalADD(&PnS, &PnS, &nS, &pairing_dlog_data->ixP);
cubicalDBLADD(&nSQ, &nS, &nSQ, &nS, &pairing_dlog_data->ixQ, &pairing_dlog_data->A24);
}
// weil(&w0,e,&PQ->P,&PQ->Q,&PQ->PmQ,&A24);
translate(&nPQ, &nP);
translate(&nPR, &nP);
translate(&nPS, &nP);
translate(&PnQ, &nQ);
translate(&RnQ, &nQ);
translate(&SnQ, &nQ);
translate(&PnR, &nR);
translate(&nRQ, &nR);
translate(&PnS, &nS);
translate(&nSQ, &nS);
translate(&nP, &nP);
translate(&nQ, &nQ);
translate(&nR, &nR);
translate(&nS, &nS);
// computation of the reference weil pairing
ec_point_t T0, T1;
fp2_t w1[5], w2[5];
// e(P, Q) = w0
point_ratio(&T0, &nPQ, &nP, &pairing_dlog_data->PQ.Q);
point_ratio(&T1, &PnQ, &nQ, &pairing_dlog_data->PQ.P);
// For the first element we need it's inverse for
// fp2_dlog_2e so we swap w1 and w2 here to save inversions
fp2_mul(&w2[0], &T0.x, &T1.z);
fp2_mul(&w1[0], &T1.x, &T0.z);
// e(P,R) = w0^r2
point_ratio(&T0, &nPR, &nP, &pairing_dlog_data->RS.P);
point_ratio(&T1, &PnR, &nR, &pairing_dlog_data->PQ.P);
fp2_mul(&w1[1], &T0.x, &T1.z);
fp2_mul(&w2[1], &T1.x, &T0.z);
// e(R,Q) = w0^r1
point_ratio(&T0, &nRQ, &nR, &pairing_dlog_data->PQ.Q);
point_ratio(&T1, &RnQ, &nQ, &pairing_dlog_data->RS.P);
fp2_mul(&w1[2], &T0.x, &T1.z);
fp2_mul(&w2[2], &T1.x, &T0.z);
// e(P,S) = w0^s2
point_ratio(&T0, &nPS, &nP, &pairing_dlog_data->RS.Q);
point_ratio(&T1, &PnS, &nS, &pairing_dlog_data->PQ.P);
fp2_mul(&w1[3], &T0.x, &T1.z);
fp2_mul(&w2[3], &T1.x, &T0.z);
// e(S,Q) = w0^s1
point_ratio(&T0, &nSQ, &nS, &pairing_dlog_data->PQ.Q);
point_ratio(&T1, &SnQ, &nQ, &pairing_dlog_data->RS.Q);
fp2_mul(&w1[4], &T0.x, &T1.z);
fp2_mul(&w2[4], &T1.x, &T0.z);
fp2_batched_inv(w1, 5);
for (int i = 0; i < 5; i++) {
fp2_mul(&w1[i], &w1[i], &w2[i]);
}
fp2_dlog_2e(r2, &w1[1], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(r1, &w1[2], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(s2, &w1[3], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(s1, &w1[4], &w1[0], pairing_dlog_data->e);
}
void
ec_dlog_2_weil(digit_t *r1,
digit_t *r2,
digit_t *s1,
digit_t *s2,
ec_basis_t *PQ,
const ec_basis_t *RS,
ec_curve_t *curve,
int e)
{
assert(test_point_order_twof(&PQ->Q, curve, e));
// precomputing the correct curve data
ec_curve_normalize_A24(curve);
pairing_dlog_params_t pairing_dlog_data;
pairing_dlog_data.e = e;
pairing_dlog_data.PQ = *PQ;
pairing_dlog_data.RS = *RS;
pairing_dlog_data.A24 = curve->A24;
cubical_normalization_dlog(&pairing_dlog_data, curve);
compute_difference_points(&pairing_dlog_data, curve);
weil_dlog(r1, r2, s1, s2, &pairing_dlog_data);
#ifndef NDEBUG
ec_point_t test;
ec_biscalar_mul(&test, r1, r2, e, PQ, curve);
// R = [r1]P + [r2]Q
assert(ec_is_equal(&test, &RS->P));
ec_biscalar_mul(&test, s1, s2, e, PQ, curve);
// S = [s1]P + [s2]Q
assert(ec_is_equal(&test, &RS->Q));
#endif
}
// Inline all the Tate pairing computations needed for ec_dlog_2_weil
// including reduction, assumes a bases PQ of full E[2^e_full] torsion
// and a bases RS of smaller E[2^e] torsion
static void
tate_dlog_partial(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, pairing_dlog_params_t *pairing_dlog_data)
{
uint32_t e_full = TORSION_EVEN_POWER;
uint32_t e_diff = e_full - pairing_dlog_data->e;
ec_point_t nP, nQ, nR, nS, nPQ, PnR, PnS, nRQ, nSQ;
copy_point(&nP, &pairing_dlog_data->PQ.P);
copy_point(&nQ, &pairing_dlog_data->PQ.Q);
copy_point(&nR, &pairing_dlog_data->RS.P);
copy_point(&nS, &pairing_dlog_data->RS.Q);
copy_point(&nPQ, &pairing_dlog_data->PQ.PmQ);
copy_point(&PnR, &pairing_dlog_data->diff.PmR);
copy_point(&PnS, &pairing_dlog_data->diff.PmS);
copy_point(&nRQ, &pairing_dlog_data->diff.RmQ);
copy_point(&nSQ, &pairing_dlog_data->diff.SmQ);
for (uint32_t i = 0; i < e_full - 1; i++) {
cubicalDBLADD(&nPQ, &nP, &nPQ, &nP, &pairing_dlog_data->ixQ, &pairing_dlog_data->A24);
}
for (uint32_t i = 0; i < pairing_dlog_data->e - 1; i++) {
cubicalADD(&PnR, &PnR, &nR, &pairing_dlog_data->ixP);
cubicalDBLADD(&nRQ, &nR, &nRQ, &nR, &pairing_dlog_data->ixQ, &pairing_dlog_data->A24);
cubicalADD(&PnS, &PnS, &nS, &pairing_dlog_data->ixP);
cubicalDBLADD(&nSQ, &nS, &nSQ, &nS, &pairing_dlog_data->ixQ, &pairing_dlog_data->A24);
}
translate(&nPQ, &nP);
translate(&PnR, &nR);
translate(&nRQ, &nR);
translate(&PnS, &nS);
translate(&nSQ, &nS);
translate(&nP, &nP);
translate(&nQ, &nQ);
translate(&nR, &nR);
translate(&nS, &nS);
// computation of the reference Tate pairing
ec_point_t T0;
fp2_t w1[5], w2[5];
// t(P, Q)^(2^e_diff) = w0
point_ratio(&T0, &nPQ, &nP, &pairing_dlog_data->PQ.Q);
fp2_copy(&w1[0], &T0.x);
fp2_copy(&w2[0], &T0.z);
// t(R,P) = w0^r2
point_ratio(&T0, &PnR, &nR, &pairing_dlog_data->PQ.P);
fp2_copy(&w1[1], &T0.x);
fp2_copy(&w2[1], &T0.z);
// t(R,Q) = w0^r1
point_ratio(&T0, &nRQ, &nR, &pairing_dlog_data->PQ.Q);
fp2_copy(&w2[2], &T0.x);
fp2_copy(&w1[2], &T0.z);
// t(S,P) = w0^s2
point_ratio(&T0, &PnS, &nS, &pairing_dlog_data->PQ.P);
fp2_copy(&w1[3], &T0.x);
fp2_copy(&w2[3], &T0.z);
// t(S,Q) = w0^s1
point_ratio(&T0, &nSQ, &nS, &pairing_dlog_data->PQ.Q);
fp2_copy(&w2[4], &T0.x);
fp2_copy(&w1[4], &T0.z);
// batched reduction using projective representation
for (int i = 0; i < 5; i++) {
fp2_t frob, tmp;
fp2_copy(&tmp, &w1[i]);
// inline frobenius for ^p
// multiply by inverse to get ^(p-1)
fp2_frob(&frob, &w1[i]);
fp2_mul(&w1[i], &w2[i], &frob);
// repeat for denom
fp2_frob(&frob, &w2[i]);
fp2_mul(&w2[i], &tmp, &frob);
}
// batched normalization
fp2_batched_inv(w2, 5);
for (int i = 0; i < 5; i++) {
fp2_mul(&w1[i], &w1[i], &w2[i]);
}
for (int i = 0; i < 5; i++) {
clear_cofac(&w1[i], &w1[i]);
// removes 2^e_diff
for (uint32_t j = 0; j < e_diff; j++) {
fp2_sqr(&w1[i], &w1[i]);
}
}
fp2_dlog_2e(r2, &w1[1], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(r1, &w1[2], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(s2, &w1[3], &w1[0], pairing_dlog_data->e);
fp2_dlog_2e(s1, &w1[4], &w1[0], pairing_dlog_data->e);
}
void
ec_dlog_2_tate(digit_t *r1,
digit_t *r2,
digit_t *s1,
digit_t *s2,
const ec_basis_t *PQ,
const ec_basis_t *RS,
ec_curve_t *curve,
int e)
{
// assume PQ is a full torsion basis
// returns a, b, c, d such that R = [a]P + [b]Q, S = [c]P + [d]Q
#ifndef NDEBUG
int e_full = TORSION_EVEN_POWER;
int e_diff = e_full - e;
#endif
assert(test_basis_order_twof(PQ, curve, e_full));
// precomputing the correct curve data
ec_curve_normalize_A24(curve);
pairing_dlog_params_t pairing_dlog_data;
pairing_dlog_data.e = e;
pairing_dlog_data.PQ = *PQ;
pairing_dlog_data.RS = *RS;
pairing_dlog_data.A24 = curve->A24;
cubical_normalization_dlog(&pairing_dlog_data, curve);
compute_difference_points(&pairing_dlog_data, curve);
tate_dlog_partial(r1, r2, s1, s2, &pairing_dlog_data);
#ifndef NDEBUG
ec_point_t test;
ec_biscalar_mul(&test, r1, r2, e, PQ, curve);
ec_dbl_iter(&test, e_diff, &test, curve);
// R = [r1]P + [r2]Q
assert(ec_is_equal(&test, &RS->P));
ec_biscalar_mul(&test, s1, s2, e, PQ, curve);
ec_dbl_iter(&test, e_diff, &test, curve);
// S = [s1]P + [s2]Q
assert(ec_is_equal(&test, &RS->Q));
#endif
}

665
src/ec/ref/lvlx/ec.c Normal file
View File

@@ -0,0 +1,665 @@
#include <assert.h>
#include <stdio.h>
#include <mp.h>
#include <ec.h>
void
ec_point_init(ec_point_t *P)
{ // Initialize point as identity element (1:0)
fp2_set_one(&(P->x));
fp2_set_zero(&(P->z));
}
void
ec_curve_init(ec_curve_t *E)
{ // Initialize the curve struct
// Initialize the constants
fp2_set_zero(&(E->A));
fp2_set_one(&(E->C));
// Initialize the point (A+2 : 4C)
ec_point_init(&(E->A24));
// Set the bool to be false by default
E->is_A24_computed_and_normalized = false;
}
void
select_point(ec_point_t *Q, const ec_point_t *P1, const ec_point_t *P2, const digit_t option)
{ // Select points in constant time
// If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2
fp2_select(&(Q->x), &(P1->x), &(P2->x), option);
fp2_select(&(Q->z), &(P1->z), &(P2->z), option);
}
void
cswap_points(ec_point_t *P, ec_point_t *Q, const digit_t option)
{ // Swap points in constant time
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
fp2_cswap(&(P->x), &(Q->x), option);
fp2_cswap(&(P->z), &(Q->z), option);
}
void
ec_normalize_point(ec_point_t *P)
{
fp2_inv(&P->z);
fp2_mul(&P->x, &P->x, &P->z);
fp2_set_one(&(P->z));
}
void
ec_normalize_curve(ec_curve_t *E)
{
fp2_inv(&E->C);
fp2_mul(&E->A, &E->A, &E->C);
fp2_set_one(&E->C);
}
void
ec_curve_normalize_A24(ec_curve_t *E)
{
if (!E->is_A24_computed_and_normalized) {
AC_to_A24(&E->A24, E);
ec_normalize_point(&E->A24);
E->is_A24_computed_and_normalized = true;
}
assert(fp2_is_one(&E->A24.z));
}
void
ec_normalize_curve_and_A24(ec_curve_t *E)
{ // Neither the curve or A24 are guaranteed to be normalized.
// First we normalize (A/C : 1) and conditionally compute
if (!fp2_is_one(&E->C)) {
ec_normalize_curve(E);
}
if (!E->is_A24_computed_and_normalized) {
// Now compute A24 = ((A + 2) / 4 : 1)
fp2_add_one(&E->A24.x, &E->A); // re(A24.x) = re(A) + 1
fp2_add_one(&E->A24.x, &E->A24.x); // re(A24.x) = re(A) + 2
fp_copy(&E->A24.x.im, &E->A.im); // im(A24.x) = im(A)
fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 2
fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 4
fp2_set_one(&E->A24.z);
E->is_A24_computed_and_normalized = true;
}
}
uint32_t
ec_is_zero(const ec_point_t *P)
{
return fp2_is_zero(&P->z);
}
uint32_t
ec_has_zero_coordinate(const ec_point_t *P)
{
return fp2_is_zero(&P->x) | fp2_is_zero(&P->z);
}
uint32_t
ec_is_equal(const ec_point_t *P, const ec_point_t *Q)
{ // Evaluate if two points in Montgomery coordinates (X:Z) are equal
// Returns 0xFFFFFFFF (true) if P=Q, 0 (false) otherwise
fp2_t t0, t1;
// Check if P, Q are the points at infinity
uint32_t l_zero = ec_is_zero(P);
uint32_t r_zero = ec_is_zero(Q);
// Check if PX * QZ = QX * PZ
fp2_mul(&t0, &P->x, &Q->z);
fp2_mul(&t1, &P->z, &Q->x);
uint32_t lr_equal = fp2_is_equal(&t0, &t1);
// Points are equal if
// - Both are zero, or
// - neither are zero AND PX * QZ = QX * PZ
return (l_zero & r_zero) | (~l_zero & ~r_zero * lr_equal);
}
uint32_t
ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E)
{
if (ec_is_zero(P))
return 0;
uint32_t x_is_zero, tmp_is_zero;
fp2_t t0, t1, t2;
fp2_add(&t0, &P->x, &P->z);
fp2_sqr(&t0, &t0);
fp2_sub(&t1, &P->x, &P->z);
fp2_sqr(&t1, &t1);
fp2_sub(&t2, &t0, &t1);
fp2_add(&t1, &t0, &t1);
fp2_mul(&t2, &t2, &E->A);
fp2_mul(&t1, &t1, &E->C);
fp2_add(&t1, &t1, &t1);
fp2_add(&t0, &t1, &t2); // 4 (CX^2+CZ^2+AXZ)
x_is_zero = fp2_is_zero(&P->x);
tmp_is_zero = fp2_is_zero(&t0);
// two torsion if x or x^2 + Ax + 1 is zero
return x_is_zero | tmp_is_zero;
}
uint32_t
ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E)
{
ec_point_t test;
xDBL_A24(&test, P, &E->A24, E->is_A24_computed_and_normalized);
return ec_is_two_torsion(&test, E);
}
uint32_t
ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E)
{ // Check if basis points (P, Q) form a full 2^t-basis
ec_point_t P2, Q2;
xDBL_A24(&P2, &B->P, &E->A24, E->is_A24_computed_and_normalized);
xDBL_A24(&Q2, &B->Q, &E->A24, E->is_A24_computed_and_normalized);
return (ec_is_two_torsion(&P2, E) & ec_is_two_torsion(&Q2, E) & ~ec_is_equal(&P2, &Q2));
}
int
ec_curve_verify_A(const fp2_t *A)
{ // Verify the Montgomery coefficient A is valid (A^2-4 \ne 0)
// Return 1 if curve is valid, 0 otherwise
fp2_t t;
fp2_set_one(&t);
fp_add(&t.re, &t.re, &t.re); // t=2
if (fp2_is_equal(A, &t))
return 0;
fp_neg(&t.re, &t.re); // t=-2
if (fp2_is_equal(A, &t))
return 0;
return 1;
}
int
ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A)
{ // Initialize the curve from the A coefficient and check it is valid
// Return 1 if curve is valid, 0 otherwise
ec_curve_init(E);
fp2_copy(&E->A, A); // Set A
return ec_curve_verify_A(A);
}
void
ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve)
{ // j-invariant computation for Montgommery coefficient A2=(A+2C:4C)
fp2_t t0, t1;
fp2_sqr(&t1, &curve->C);
fp2_sqr(j_inv, &curve->A);
fp2_add(&t0, &t1, &t1);
fp2_sub(&t0, j_inv, &t0);
fp2_sub(&t0, &t0, &t1);
fp2_sub(j_inv, &t0, &t1);
fp2_sqr(&t1, &t1);
fp2_mul(j_inv, j_inv, &t1);
fp2_add(&t0, &t0, &t0);
fp2_add(&t0, &t0, &t0);
fp2_sqr(&t1, &t0);
fp2_mul(&t0, &t0, &t1);
fp2_add(&t0, &t0, &t0);
fp2_add(&t0, &t0, &t0);
fp2_inv(j_inv);
fp2_mul(j_inv, &t0, j_inv);
}
void
xDBL_E0(ec_point_t *Q, const ec_point_t *P)
{ // Doubling of a Montgomery point in projective coordinates (X:Z) on the curve E0 with (A:C) = (0:1).
// Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C) = (0:1).
// Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ.
fp2_t t0, t1, t2;
fp2_add(&t0, &P->x, &P->z);
fp2_sqr(&t0, &t0);
fp2_sub(&t1, &P->x, &P->z);
fp2_sqr(&t1, &t1);
fp2_sub(&t2, &t0, &t1);
fp2_add(&t1, &t1, &t1);
fp2_mul(&Q->x, &t0, &t1);
fp2_add(&Q->z, &t1, &t2);
fp2_mul(&Q->z, &Q->z, &t2);
}
void
xDBL(ec_point_t *Q, const ec_point_t *P, const ec_point_t *AC)
{ // Doubling of a Montgomery point in projective coordinates (X:Z). Computation of coefficient values A+2C and 4C
// on-the-fly.
// Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C).
// Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ.
fp2_t t0, t1, t2, t3;
fp2_add(&t0, &P->x, &P->z);
fp2_sqr(&t0, &t0);
fp2_sub(&t1, &P->x, &P->z);
fp2_sqr(&t1, &t1);
fp2_sub(&t2, &t0, &t1);
fp2_add(&t3, &AC->z, &AC->z);
fp2_mul(&t1, &t1, &t3);
fp2_add(&t1, &t1, &t1);
fp2_mul(&Q->x, &t0, &t1);
fp2_add(&t0, &t3, &AC->x);
fp2_mul(&t0, &t0, &t2);
fp2_add(&t0, &t0, &t1);
fp2_mul(&Q->z, &t0, &t2);
}
void
xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized)
{ // Doubling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and
// the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized).
// Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ.
fp2_t t0, t1, t2;
fp2_add(&t0, &P->x, &P->z);
fp2_sqr(&t0, &t0);
fp2_sub(&t1, &P->x, &P->z);
fp2_sqr(&t1, &t1);
fp2_sub(&t2, &t0, &t1);
if (!A24_normalized)
fp2_mul(&t1, &t1, &A24->z);
fp2_mul(&Q->x, &t0, &t1);
fp2_mul(&t0, &t2, &A24->x);
fp2_add(&t0, &t0, &t1);
fp2_mul(&Q->z, &t0, &t2);
}
void
xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ)
{ // Differential addition of Montgomery points in projective coordinates (X:Z).
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and difference
// PQ=P-Q=(XPQ:ZPQ).
// Output: projective Montgomery point R <- P+Q = (XR:ZR) such that x(P+Q)=XR/ZR.
fp2_t t0, t1, t2, t3;
fp2_add(&t0, &P->x, &P->z);
fp2_sub(&t1, &P->x, &P->z);
fp2_add(&t2, &Q->x, &Q->z);
fp2_sub(&t3, &Q->x, &Q->z);
fp2_mul(&t0, &t0, &t3);
fp2_mul(&t1, &t1, &t2);
fp2_add(&t2, &t0, &t1);
fp2_sub(&t3, &t0, &t1);
fp2_sqr(&t2, &t2);
fp2_sqr(&t3, &t3);
fp2_mul(&t2, &PQ->z, &t2);
fp2_mul(&R->z, &PQ->x, &t3);
fp2_copy(&R->x, &t2);
}
void
xDBLADD(ec_point_t *R,
ec_point_t *S,
const ec_point_t *P,
const ec_point_t *Q,
const ec_point_t *PQ,
const ec_point_t *A24,
const bool A24_normalized)
{ // Simultaneous doubling and differential addition.
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, the difference
// PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized).
// Output: projective Montgomery points R <- 2*P = (XR:ZR) such that x(2P)=XR/ZR, and S <- P+Q = (XS:ZS) such that =
// x(Q+P)=XS/ZS.
fp2_t t0, t1, t2;
fp2_add(&t0, &P->x, &P->z);
fp2_sub(&t1, &P->x, &P->z);
fp2_sqr(&R->x, &t0);
fp2_sub(&t2, &Q->x, &Q->z);
fp2_add(&S->x, &Q->x, &Q->z);
fp2_mul(&t0, &t0, &t2);
fp2_sqr(&R->z, &t1);
fp2_mul(&t1, &t1, &S->x);
fp2_sub(&t2, &R->x, &R->z);
if (!A24_normalized)
fp2_mul(&R->z, &R->z, &A24->z);
fp2_mul(&R->x, &R->x, &R->z);
fp2_mul(&S->x, &A24->x, &t2);
fp2_sub(&S->z, &t0, &t1);
fp2_add(&R->z, &R->z, &S->x);
fp2_add(&S->x, &t0, &t1);
fp2_mul(&R->z, &R->z, &t2);
fp2_sqr(&S->z, &S->z);
fp2_sqr(&S->x, &S->x);
fp2_mul(&S->z, &S->z, &PQ->x);
fp2_mul(&S->x, &S->x, &PQ->z);
}
void
xMUL(ec_point_t *Q, const ec_point_t *P, const digit_t *k, const int kbits, const ec_curve_t *curve)
{ // The Montgomery ladder
// Input: projective Montgomery point P=(XP:ZP) such that xP=XP/ZP, a scalar k of bitlength kbits, and
// the Montgomery curve constants (A:C) (or A24 = (A+2C/4C:1) if normalized).
// Output: projective Montgomery points Q <- k*P = (XQ:ZQ) such that x(k*P)=XQ/ZQ.
ec_point_t R0, R1, A24;
digit_t mask;
unsigned int bit, prevbit = 0, swap;
if (!curve->is_A24_computed_and_normalized) {
// Computation of A24=(A+2C:4C)
fp2_add(&A24.x, &curve->C, &curve->C);
fp2_add(&A24.z, &A24.x, &A24.x);
fp2_add(&A24.x, &A24.x, &curve->A);
} else {
fp2_copy(&A24.x, &curve->A24.x);
fp2_copy(&A24.z, &curve->A24.z);
// Assert A24 has been normalised
assert(fp2_is_one(&A24.z));
}
// R0 <- (1:0), R1 <- P
ec_point_init(&R0);
fp2_copy(&R1.x, &P->x);
fp2_copy(&R1.z, &P->z);
// Main loop
for (int i = kbits - 1; i >= 0; i--) {
bit = (k[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1;
swap = bit ^ prevbit;
prevbit = bit;
mask = 0 - (digit_t)swap;
cswap_points(&R0, &R1, mask);
xDBLADD(&R0, &R1, &R0, &R1, P, &A24, true);
}
swap = 0 ^ prevbit;
mask = 0 - (digit_t)swap;
cswap_points(&R0, &R1, mask);
fp2_copy(&Q->x, &R0.x);
fp2_copy(&Q->z, &R0.z);
}
int
xDBLMUL(ec_point_t *S,
const ec_point_t *P,
const digit_t *k,
const ec_point_t *Q,
const digit_t *l,
const ec_point_t *PQ,
const int kbits,
const ec_curve_t *curve)
{ // The Montgomery biladder
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, scalars k and l of
// bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants (A:C).
// Output: projective Montgomery point S <- k*P + l*Q = (XS:ZS) such that x(k*P + l*Q)=XS/ZS.
int i, A_is_zero;
digit_t evens, mevens, bitk0, bitl0, maskk, maskl, temp, bs1_ip1, bs2_ip1, bs1_i, bs2_i, h;
digit_t sigma[2] = { 0 }, pre_sigma = 0;
digit_t k_t[NWORDS_ORDER], l_t[NWORDS_ORDER], one[NWORDS_ORDER] = { 0 }, r[2 * BITS] = { 0 };
ec_point_t DIFF1a, DIFF1b, DIFF2a, DIFF2b, R[3] = { 0 }, T[3];
// differential additions formulas are invalid in this case
if (ec_has_zero_coordinate(P) | ec_has_zero_coordinate(Q) | ec_has_zero_coordinate(PQ))
return 0;
// Derive sigma according to parity
bitk0 = (k[0] & 1);
bitl0 = (l[0] & 1);
maskk = 0 - bitk0; // Parity masks: 0 if even, otherwise 1...1
maskl = 0 - bitl0;
sigma[0] = (bitk0 ^ 1);
sigma[1] = (bitl0 ^ 1);
evens = sigma[0] + sigma[1]; // Count number of even scalars
mevens = 0 - (evens & 1); // Mask mevens <- 0 if # even of scalars = 0 or 2, otherwise mevens = 1...1
// If k and l are both even or both odd, pick sigma = (0,1)
sigma[0] = (sigma[0] & mevens);
sigma[1] = (sigma[1] & mevens) | (1 & ~mevens);
// Convert even scalars to odd
one[0] = 1;
mp_sub(k_t, k, one, NWORDS_ORDER);
mp_sub(l_t, l, one, NWORDS_ORDER);
select_ct(k_t, k_t, k, maskk, NWORDS_ORDER);
select_ct(l_t, l_t, l, maskl, NWORDS_ORDER);
// Scalar recoding
for (i = 0; i < kbits; i++) {
// If sigma[0] = 1 swap k_t and l_t
maskk = 0 - (sigma[0] ^ pre_sigma);
swap_ct(k_t, l_t, maskk, NWORDS_ORDER);
if (i == kbits - 1) {
bs1_ip1 = 0;
bs2_ip1 = 0;
} else {
bs1_ip1 = mp_shiftr(k_t, 1, NWORDS_ORDER);
bs2_ip1 = mp_shiftr(l_t, 1, NWORDS_ORDER);
}
bs1_i = k_t[0] & 1;
bs2_i = l_t[0] & 1;
r[2 * i] = bs1_i ^ bs1_ip1;
r[2 * i + 1] = bs2_i ^ bs2_ip1;
// Revert sigma if second bit, r_(2i+1), is 1
pre_sigma = sigma[0];
maskk = 0 - r[2 * i + 1];
select_ct(&temp, &sigma[0], &sigma[1], maskk, 1);
select_ct(&sigma[1], &sigma[1], &sigma[0], maskk, 1);
sigma[0] = temp;
}
// Point initialization
ec_point_init(&R[0]);
maskk = 0 - sigma[0];
select_point(&R[1], P, Q, maskk);
select_point(&R[2], Q, P, maskk);
fp2_copy(&DIFF1a.x, &R[1].x);
fp2_copy(&DIFF1a.z, &R[1].z);
fp2_copy(&DIFF1b.x, &R[2].x);
fp2_copy(&DIFF1b.z, &R[2].z);
// Initialize DIFF2a <- P+Q, DIFF2b <- P-Q
xADD(&R[2], &R[1], &R[2], PQ);
if (ec_has_zero_coordinate(&R[2]))
return 0; // non valid formulas
fp2_copy(&DIFF2a.x, &R[2].x);
fp2_copy(&DIFF2a.z, &R[2].z);
fp2_copy(&DIFF2b.x, &PQ->x);
fp2_copy(&DIFF2b.z, &PQ->z);
A_is_zero = fp2_is_zero(&curve->A);
// Main loop
for (i = kbits - 1; i >= 0; i--) {
h = r[2 * i] + r[2 * i + 1]; // in {0, 1, 2}
maskk = 0 - (h & 1);
select_point(&T[0], &R[0], &R[1], maskk);
maskk = 0 - (h >> 1);
select_point(&T[0], &T[0], &R[2], maskk);
if (A_is_zero) {
xDBL_E0(&T[0], &T[0]);
} else {
assert(fp2_is_one(&curve->A24.z));
xDBL_A24(&T[0], &T[0], &curve->A24, true);
}
maskk = 0 - r[2 * i + 1]; // in {0, 1}
select_point(&T[1], &R[0], &R[1], maskk);
select_point(&T[2], &R[1], &R[2], maskk);
cswap_points(&DIFF1a, &DIFF1b, maskk);
xADD(&T[1], &T[1], &T[2], &DIFF1a);
xADD(&T[2], &R[0], &R[2], &DIFF2a);
// If hw (mod 2) = 1 then swap DIFF2a and DIFF2b
maskk = 0 - (h & 1);
cswap_points(&DIFF2a, &DIFF2b, maskk);
// R <- T
copy_point(&R[0], &T[0]);
copy_point(&R[1], &T[1]);
copy_point(&R[2], &T[2]);
}
// Output R[evens]
select_point(S, &R[0], &R[1], mevens);
maskk = 0 - (bitk0 & bitl0);
select_point(S, S, &R[2], maskk);
return 1;
}
int
ec_ladder3pt(ec_point_t *R,
const digit_t *m,
const ec_point_t *P,
const ec_point_t *Q,
const ec_point_t *PQ,
const ec_curve_t *E)
{ // The 3-point Montgomery ladder
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, a scalar k of
// bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C/4C:1).
// Output: projective Montgomery point R <- P + m*Q = (XR:ZR) such that x(P + m*Q)=XR/ZR.
assert(E->is_A24_computed_and_normalized);
if (!fp2_is_one(&E->A24.z)) {
return 0;
}
// Formulas are not valid in that case
if (ec_has_zero_coordinate(PQ)) {
return 0;
}
ec_point_t X0, X1, X2;
copy_point(&X0, Q);
copy_point(&X1, P);
copy_point(&X2, PQ);
int i, j;
digit_t t;
for (i = 0; i < NWORDS_ORDER; i++) {
t = 1;
for (j = 0; j < RADIX; j++) {
cswap_points(&X1, &X2, -((t & m[i]) == 0));
xDBLADD(&X0, &X1, &X0, &X1, &X2, &E->A24, true);
cswap_points(&X1, &X2, -((t & m[i]) == 0));
t <<= 1;
};
};
copy_point(R, &X1);
return 1;
}
// WRAPPERS to export
void
ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve)
{
// If A24 = ((A+2)/4 : 1) we save multiplications
if (curve->is_A24_computed_and_normalized) {
assert(fp2_is_one(&curve->A24.z));
xDBL_A24(res, P, &curve->A24, true);
} else {
// Otherwise we compute A24 on the fly for doubling
xDBL(res, P, (const ec_point_t *)curve);
}
}
void
ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve)
{
if (n == 0) {
copy_point(res, P);
return;
}
// When the chain is long enough, we should normalise A24
if (n > 50) {
ec_curve_normalize_A24(curve);
}
// When A24 is normalized we can save some multiplications
if (curve->is_A24_computed_and_normalized) {
assert(fp2_is_one(&curve->A24.z));
xDBL_A24(res, P, &curve->A24, true);
for (int i = 0; i < n - 1; i++) {
assert(fp2_is_one(&curve->A24.z));
xDBL_A24(res, res, &curve->A24, true);
}
} else {
// Otherwise we do normal doubling
xDBL(res, P, (const ec_point_t *)curve);
for (int i = 0; i < n - 1; i++) {
xDBL(res, res, (const ec_point_t *)curve);
}
}
}
void
ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve)
{
ec_dbl_iter(&res->P, n, &B->P, curve);
ec_dbl_iter(&res->Q, n, &B->Q, curve);
ec_dbl_iter(&res->PmQ, n, &B->PmQ, curve);
}
void
ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve)
{
// For large scalars it's worth normalising anyway
if (kbits > 50) {
ec_curve_normalize_A24(curve);
}
// When A24 is computed and normalized we save some Fp2 multiplications
xMUL(res, P, scalar, kbits, curve);
}
int
ec_biscalar_mul(ec_point_t *res,
const digit_t *scalarP,
const digit_t *scalarQ,
const int kbits,
const ec_basis_t *PQ,
const ec_curve_t *curve)
{
if (fp2_is_zero(&PQ->PmQ.z))
return 0;
/* Differential additions behave badly when PmQ = (0:1), so we need to
* treat this case specifically. Since we assume P, Q are a basis, this
* can happen only if kbits==1 */
if (kbits == 1) {
// Sanity check: our basis should be given by 2-torsion points
if (!ec_is_two_torsion(&PQ->P, curve) || !ec_is_two_torsion(&PQ->Q, curve) ||
!ec_is_two_torsion(&PQ->PmQ, curve))
return 0;
digit_t bP, bQ;
bP = (scalarP[0] & 1);
bQ = (scalarQ[0] & 1);
if (bP == 0 && bQ == 0)
ec_point_init(res); //(1: 0)
else if (bP == 1 && bQ == 0)
copy_point(res, &PQ->P);
else if (bP == 0 && bQ == 1)
copy_point(res, &PQ->Q);
else if (bP == 1 && bQ == 1)
copy_point(res, &PQ->PmQ);
else // should never happen
assert(0);
return 1;
} else {
ec_curve_t E;
copy_curve(&E, curve);
if (!fp2_is_zero(&curve->A)) { // If A is not zero normalize
ec_curve_normalize_A24(&E);
}
return xDBLMUL(res, &PQ->P, scalarP, &PQ->Q, scalarQ, &PQ->PmQ, kbits, (const ec_curve_t *)&E);
}
}

335
src/ec/ref/lvlx/ec_jac.c Normal file
View File

@@ -0,0 +1,335 @@
#include <assert.h>
#include <ec.h>
void
jac_init(jac_point_t *P)
{ // Initialize Montgomery in Jacobian coordinates as identity element (0:1:0)
fp2_set_zero(&P->x);
fp2_set_one(&P->y);
fp2_set_zero(&P->z);
}
uint32_t
jac_is_equal(const jac_point_t *P, const jac_point_t *Q)
{ // Evaluate if two points in Jacobian coordinates (X:Y:Z) are equal
// Returns 1 (true) if P=Q, 0 (false) otherwise
fp2_t t0, t1, t2, t3;
fp2_sqr(&t0, &Q->z);
fp2_mul(&t2, &P->x, &t0); // x1*z2^2
fp2_sqr(&t1, &P->z);
fp2_mul(&t3, &Q->x, &t1); // x2*z1^2
fp2_sub(&t2, &t2, &t3);
fp2_mul(&t0, &t0, &Q->z);
fp2_mul(&t0, &P->y, &t0); // y1*z2^3
fp2_mul(&t1, &t1, &P->z);
fp2_mul(&t1, &Q->y, &t1); // y2*z1^3
fp2_sub(&t0, &t0, &t1);
return fp2_is_zero(&t0) & fp2_is_zero(&t2);
}
void
jac_to_xz(ec_point_t *P, const jac_point_t *xyP)
{
fp2_copy(&P->x, &xyP->x);
fp2_copy(&P->z, &xyP->z);
fp2_sqr(&P->z, &P->z);
// If xyP = (0:1:0), we currently have P=(0 : 0) but we want to set P=(1:0)
uint32_t c1, c2;
fp2_t one;
fp2_set_one(&one);
c1 = fp2_is_zero(&P->x);
c2 = fp2_is_zero(&P->z);
fp2_select(&P->x, &P->x, &one, c1 & c2);
}
void
jac_to_ws(jac_point_t *Q, fp2_t *t, fp2_t *ao3, const jac_point_t *P, const ec_curve_t *curve)
{
// Cost of 3M + 2S when A != 0.
fp_t one;
fp2_t a;
/* a = 1 - A^2/3, U = X + (A*Z^2)/3, V = Y, W = Z, T = a*Z^4*/
fp_set_one(&one);
if (!fp2_is_zero(&(curve->A))) {
fp_div3(&(ao3->re), &(curve->A.re));
fp_div3(&(ao3->im), &(curve->A.im));
fp2_sqr(t, &P->z);
fp2_mul(&Q->x, ao3, t);
fp2_add(&Q->x, &Q->x, &P->x);
fp2_sqr(t, t);
fp2_mul(&a, ao3, &(curve->A));
fp_sub(&(a.re), &one, &(a.re));
fp_neg(&(a.im), &(a.im));
fp2_mul(t, t, &a);
} else {
fp2_copy(&Q->x, &P->x);
fp2_sqr(t, &P->z);
fp2_sqr(t, t);
}
fp2_copy(&Q->y, &P->y);
fp2_copy(&Q->z, &P->z);
}
void
jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve)
{
// Cost of 1M + 1S when A != 0.
fp2_t t;
/* X = U - (A*W^2)/3, Y = V, Z = W. */
if (!fp2_is_zero(&(curve->A))) {
fp2_sqr(&t, &P->z);
fp2_mul(&t, &t, ao3);
fp2_sub(&Q->x, &P->x, &t);
}
fp2_copy(&Q->y, &P->y);
fp2_copy(&Q->z, &P->z);
}
void
copy_jac_point(jac_point_t *P, const jac_point_t *Q)
{
fp2_copy(&(P->x), &(Q->x));
fp2_copy(&(P->y), &(Q->y));
fp2_copy(&(P->z), &(Q->z));
}
void
jac_neg(jac_point_t *Q, const jac_point_t *P)
{
fp2_copy(&Q->x, &P->x);
fp2_neg(&Q->y, &P->y);
fp2_copy(&Q->z, &P->z);
}
void
DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC)
{ // Cost of 6M + 6S.
// Doubling on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding to
// (X/Z^2,Y/Z^3) This version receives the coefficient value A
fp2_t t0, t1, t2, t3;
uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z);
fp2_sqr(&t0, &P->x); // t0 = x1^2
fp2_add(&t1, &t0, &t0);
fp2_add(&t0, &t0, &t1); // t0 = 3x1^2
fp2_sqr(&t1, &P->z); // t1 = z1^2
fp2_mul(&t2, &P->x, &AC->A);
fp2_add(&t2, &t2, &t2); // t2 = 2Ax1
fp2_add(&t2, &t1, &t2); // t2 = 2Ax1+z1^2
fp2_mul(&t2, &t1, &t2); // t2 = z1^2(2Ax1+z1^2)
fp2_add(&t2, &t0, &t2); // t2 = alpha = 3x1^2 + z1^2(2Ax1+z1^2)
fp2_mul(&Q->z, &P->y, &P->z);
fp2_add(&Q->z, &Q->z, &Q->z); // z2 = 2y1z1
fp2_sqr(&t0, &Q->z);
fp2_mul(&t0, &t0, &AC->A); // t0 = 4Ay1^2z1^2
fp2_sqr(&t1, &P->y);
fp2_add(&t1, &t1, &t1); // t1 = 2y1^2
fp2_add(&t3, &P->x, &P->x); // t3 = 2x1
fp2_mul(&t3, &t1, &t3); // t3 = 4x1y1^2
fp2_sqr(&Q->x, &t2); // x2 = alpha^2
fp2_sub(&Q->x, &Q->x, &t0); // x2 = alpha^2 - 4Ay1^2z1^2
fp2_sub(&Q->x, &Q->x, &t3);
fp2_sub(&Q->x, &Q->x, &t3); // x2 = alpha^2 - 4Ay1^2z1^2 - 8x1y1^2
fp2_sub(&Q->y, &t3, &Q->x); // y2 = 4x1y1^2 - x2
fp2_mul(&Q->y, &Q->y, &t2); // y2 = alpha(4x1y1^2 - x2)
fp2_sqr(&t1, &t1); // t1 = 4y1^4
fp2_sub(&Q->y, &Q->y, &t1);
fp2_sub(&Q->y, &Q->y, &t1); // y2 = alpha(4x1y1^2 - x2) - 8y1^4
fp2_select(&Q->x, &Q->x, &P->x, -flag);
fp2_select(&Q->z, &Q->z, &P->z, -flag);
}
void
DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t)
{ // Cost of 3M + 5S.
// Doubling on a Weierstrass curve, representation in modified Jacobian coordinates
// (X:Y:Z:T=a*Z^4) corresponding to (X/Z^2,Y/Z^3), where a is the curve coefficient.
// Formula from https://hyperelliptic.org/EFD/g1p/auto-shortw-modified.html
uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z);
fp2_t xx, c, cc, r, s, m;
// XX = X^2
fp2_sqr(&xx, &P->x);
// A = 2*Y^2
fp2_sqr(&c, &P->y);
fp2_add(&c, &c, &c);
// AA = A^2
fp2_sqr(&cc, &c);
// R = 2*AA
fp2_add(&r, &cc, &cc);
// S = (X+A)^2-XX-AA
fp2_add(&s, &P->x, &c);
fp2_sqr(&s, &s);
fp2_sub(&s, &s, &xx);
fp2_sub(&s, &s, &cc);
// M = 3*XX+T1
fp2_add(&m, &xx, &xx);
fp2_add(&m, &m, &xx);
fp2_add(&m, &m, t);
// X3 = M^2-2*S
fp2_sqr(&Q->x, &m);
fp2_sub(&Q->x, &Q->x, &s);
fp2_sub(&Q->x, &Q->x, &s);
// Z3 = 2*Y*Z
fp2_mul(&Q->z, &P->y, &P->z);
fp2_add(&Q->z, &Q->z, &Q->z);
// Y3 = M*(S-X3)-R
fp2_sub(&Q->y, &s, &Q->x);
fp2_mul(&Q->y, &Q->y, &m);
fp2_sub(&Q->y, &Q->y, &r);
// T3 = 2*R*T1
fp2_mul(u, t, &r);
fp2_add(u, u, u);
fp2_select(&Q->x, &Q->x, &P->x, -flag);
fp2_select(&Q->z, &Q->z, &P->z, -flag);
}
void
select_jac_point(jac_point_t *Q, const jac_point_t *P1, const jac_point_t *P2, const digit_t option)
{ // Select points
// If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2
fp2_select(&(Q->x), &(P1->x), &(P2->x), option);
fp2_select(&(Q->y), &(P1->y), &(P2->y), option);
fp2_select(&(Q->z), &(P1->z), &(P2->z), option);
}
void
ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC)
{
// Addition on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding
// to (x,y) = (X/Z^2,Y/Z^3) This version receives the coefficient value A
//
// Complete routine, to handle all edge cases:
// if ZP == 0: # P == inf
// return Q
// if ZQ == 0: # Q == inf
// return P
// dy <- YQ*ZP**3 - YP*ZQ**3
// dx <- XQ*ZP**2 - XP*ZQ**2
// if dx == 0: # x1 == x2
// if dy == 0: # ... and y1 == y2: doubling case
// dy <- ZP*ZQ * (3*XP^2 + ZP^2 * (2*A*XP + ZP^2))
// dx <- 2*YP*ZP
// else: # ... but y1 != y2, thus P = -Q
// return inf
// XR <- dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2)
// YR <- dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3
// ZR <- dx * ZP * ZQ
// Constant time processing:
// - The case for P == 0 or Q == 0 is handled at the end with conditional select
// - dy and dx are computed for both the normal and doubling cases, we switch when
// dx == dy == 0 for the normal case.
// - If we have that P = -Q then dx = 0 and so ZR will be zero, giving us the point
// at infinity for "free".
//
// These current formula are expensive and I'm probably missing some tricks...
// Thought I'd get the ball rolling.
// Cost 17M + 6S + 13a
fp2_t t0, t1, t2, t3, u1, u2, v1, dx, dy;
/* If P is zero or Q is zero we will conditionally swap before returning. */
uint32_t ctl1 = fp2_is_zero(&P->z);
uint32_t ctl2 = fp2_is_zero(&Q->z);
/* Precompute some values */
fp2_sqr(&t0, &P->z); // t0 = z1^2
fp2_sqr(&t1, &Q->z); // t1 = z2^2
/* Compute dy and dx for ordinary case */
fp2_mul(&v1, &t1, &Q->z); // v1 = z2^3
fp2_mul(&t2, &t0, &P->z); // t2 = z1^3
fp2_mul(&v1, &v1, &P->y); // v1 = y1z2^3
fp2_mul(&t2, &t2, &Q->y); // t2 = y2z1^3
fp2_sub(&dy, &t2, &v1); // dy = y2z1^3 - y1z2^3
fp2_mul(&u2, &t0, &Q->x); // u2 = x2z1^2
fp2_mul(&u1, &t1, &P->x); // u1 = x1z2^2
fp2_sub(&dx, &u2, &u1); // dx = x2z1^2 - x1z2^2
/* Compute dy and dx for doubling case */
fp2_add(&t1, &P->y, &P->y); // dx_dbl = t1 = 2y1
fp2_add(&t2, &AC->A, &AC->A); // t2 = 2A
fp2_mul(&t2, &t2, &P->x); // t2 = 2Ax1
fp2_add(&t2, &t2, &t0); // t2 = 2Ax1 + z1^2
fp2_mul(&t2, &t2, &t0); // t2 = z1^2 * (2Ax1 + z1^2)
fp2_sqr(&t0, &P->x); // t0 = x1^2
fp2_add(&t2, &t2, &t0); // t2 = x1^2 + z1^2 * (2Ax1 + z1^2)
fp2_add(&t2, &t2, &t0); // t2 = 2*x1^2 + z1^2 * (2Ax1 + z1^2)
fp2_add(&t2, &t2, &t0); // t2 = 3*x1^2 + z1^2 * (2Ax1 + z1^2)
fp2_mul(&t2, &t2, &Q->z); // dy_dbl = t2 = z2 * (3*x1^2 + z1^2 * (2Ax1 + z1^2))
/* If dx is zero and dy is zero swap with double variables */
uint32_t ctl = fp2_is_zero(&dx) & fp2_is_zero(&dy);
fp2_select(&dx, &dx, &t1, ctl);
fp2_select(&dy, &dy, &t2, ctl);
/* Some more precomputations */
fp2_mul(&t0, &P->z, &Q->z); // t0 = z1z2
fp2_sqr(&t1, &t0); // t1 = z1z2^2
fp2_sqr(&t2, &dx); // t2 = dx^2
fp2_sqr(&t3, &dy); // t3 = dy^2
/* Compute x3 = dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) */
fp2_mul(&R->x, &AC->A, &t1); // x3 = A*(z1z2)^2
fp2_add(&R->x, &R->x, &u1); // x3 = A*(z1z2)^2 + u1
fp2_add(&R->x, &R->x, &u2); // x3 = A*(z1z2)^2 + u1 + u2
fp2_mul(&R->x, &R->x, &t2); // x3 = dx^2 * (A*(z1z2)^2 + u1 + u2)
fp2_sub(&R->x, &t3, &R->x); // x3 = dy^2 - dx^2 * (A*(z1z2)^2 + u1 + u2)
/* Compute y3 = dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3*/
fp2_mul(&R->y, &u1, &t2); // y3 = u1 * dx^2
fp2_sub(&R->y, &R->y, &R->x); // y3 = u1 * dx^2 - x3
fp2_mul(&R->y, &R->y, &dy); // y3 = dy * (u1 * dx^2 - x3)
fp2_mul(&t3, &t2, &dx); // t3 = dx^3
fp2_mul(&t3, &t3, &v1); // t3 = v1 * dx^3
fp2_sub(&R->y, &R->y, &t3); // y3 = dy * (u1 * dx^2 - x3) - v1 * dx^3
/* Compute z3 = dx * z1 * z2 */
fp2_mul(&R->z, &dx, &t0);
/* Finally, we need to set R = P is Q.Z = 0 and R = Q if P.Z = 0 */
select_jac_point(R, R, Q, ctl1);
select_jac_point(R, R, P, ctl2);
}
void
jac_to_xz_add_components(add_components_t *add_comp, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC)
{
// Take P and Q in E distinct, two jac_point_t, return three components u,v and w in Fp2 such
// that the xz coordinates of P+Q are (u-v:w) and of P-Q are (u+v:w)
fp2_t t0, t1, t2, t3, t4, t5, t6;
fp2_sqr(&t0, &P->z); // t0 = z1^2
fp2_sqr(&t1, &Q->z); // t1 = z2^2
fp2_mul(&t2, &P->x, &t1); // t2 = x1z2^2
fp2_mul(&t3, &t0, &Q->x); // t3 = z1^2x2
fp2_mul(&t4, &P->y, &Q->z); // t4 = y1z2
fp2_mul(&t4, &t4, &t1); // t4 = y1z2^3
fp2_mul(&t5, &P->z, &Q->y); // t5 = z1y2
fp2_mul(&t5, &t5, &t0); // t5 = z1^3y2
fp2_mul(&t0, &t0, &t1); // t0 = (z1z2)^2
fp2_mul(&t6, &t4, &t5); // t6 = (z1z_2)^3y1y2
fp2_add(&add_comp->v, &t6, &t6); // v = 2(z1z_2)^3y1y2
fp2_sqr(&t4, &t4); // t4 = y1^2z2^6
fp2_sqr(&t5, &t5); // t5 = z1^6y_2^2
fp2_add(&t4, &t4, &t5); // t4 = z1^6y_2^2 + y1^2z2^6
fp2_add(&t5, &t2, &t3); // t5 = x1z2^2 +z_1^2x2
fp2_add(&t6, &t3, &t3); // t6 = 2z_1^2x2
fp2_sub(&t6, &t5, &t6); // t6 = lambda = x1z2^2 - z_1^2x2
fp2_sqr(&t6, &t6); // t6 = lambda^2 = (x1z2^2 - z_1^2x2)^2
fp2_mul(&t1, &AC->A, &t0); // t1 = A*(z1z2)^2
fp2_add(&t1, &t5, &t1); // t1 = gamma =A*(z1z2)^2 + x1z2^2 +z_1^2x2
fp2_mul(&t1, &t1, &t6); // t1 = gamma*lambda^2
fp2_sub(&add_comp->u, &t4, &t1); // u = z1^6y_2^2 + y1^2z2^6 - gamma*lambda^2
fp2_mul(&add_comp->w, &t6, &t0); // w = (z1z2)^2(lambda)^2
}

View File

@@ -0,0 +1,241 @@
#include "isog.h"
#include <assert.h>
// since we use degree 4 isogeny steps, we need to handle the odd case with care
static uint32_t
ec_eval_even_strategy(ec_curve_t *curve,
ec_point_t *points,
unsigned len_points,
const ec_point_t *kernel,
const int isog_len)
{
ec_curve_normalize_A24(curve);
ec_point_t A24;
copy_point(&A24, &curve->A24);
int space = 1;
for (int i = 1; i < isog_len; i *= 2)
++space;
// Stack of remaining kernel points and their associated orders
ec_point_t splits[space];
uint16_t todo[space];
splits[0] = *kernel;
todo[0] = isog_len;
int current = 0; // Pointer to current top of stack
// Chain of 4-isogenies
for (int j = 0; j < isog_len / 2; ++j) {
assert(current >= 0);
assert(todo[current] >= 1);
// Get the next point of order 4
while (todo[current] != 2) {
assert(todo[current] >= 3);
// A new split will be added
++current;
assert(current < space);
// We set the seed of the new split to be computed and saved
copy_point(&splits[current], &splits[current - 1]);
// if we copied from the very first element, then we perform one additional doubling
unsigned num_dbls = todo[current - 1] / 4 * 2 + todo[current - 1] % 2;
todo[current] = todo[current - 1] - num_dbls;
while (num_dbls--)
xDBL_A24(&splits[current], &splits[current], &A24, false);
}
if (j == 0) {
assert(fp2_is_one(&A24.z));
if (!ec_is_four_torsion(&splits[current], curve))
return -1;
ec_point_t T;
xDBL_A24(&T, &splits[current], &A24, false);
if (fp2_is_zero(&T.x))
return -1; // special isogenies not allowed
} else {
assert(todo[current] == 2);
#ifndef NDEBUG
if (fp2_is_zero(&splits[current].z))
debug_print("splitting point z coordinate is unexpectedly zero");
ec_point_t test;
xDBL_A24(&test, &splits[current], &A24, false);
if (fp2_is_zero(&test.z))
debug_print("z coordinate is unexpectedly zero before doubling");
xDBL_A24(&test, &test, &A24, false);
if (!fp2_is_zero(&test.z))
debug_print("z coordinate is unexpectedly not zero after doubling");
#endif
}
// Evaluate 4-isogeny
ec_kps4_t kps4;
xisog_4(&kps4, &A24, splits[current]);
xeval_4(splits, splits, current, &kps4);
for (int i = 0; i < current; ++i)
todo[i] -= 2;
xeval_4(points, points, len_points, &kps4);
--current;
}
assert(isog_len % 2 ? !current : current == -1);
// Final 2-isogeny
if (isog_len % 2) {
#ifndef NDEBUG
if (fp2_is_zero(&splits[0].z))
debug_print("splitting point z coordinate is unexpectedly zero");
ec_point_t test;
copy_point(&test, &splits[0]);
xDBL_A24(&test, &test, &A24, false);
if (!fp2_is_zero(&test.z))
debug_print("z coordinate is unexpectedly not zero after doubling");
#endif
// We need to check the order of this point in case there were no 4-isogenies
if (isog_len == 1 && !ec_is_two_torsion(&splits[0], curve))
return -1;
if (fp2_is_zero(&splits[0].x)) {
// special isogenies not allowed
// this case can only happen if isog_len == 1; otherwise the
// previous 4-isogenies we computed ensure that $T=(0:1)$ is put
// as the kernel of the dual isogeny
return -1;
}
ec_kps2_t kps2;
xisog_2(&kps2, &A24, splits[0]);
xeval_2(points, points, len_points, &kps2);
}
// Output curve in the form (A:C)
A24_to_AC(curve, &A24);
curve->is_A24_computed_and_normalized = false;
return 0;
}
uint32_t
ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points)
{
copy_curve(image, &phi->curve);
return ec_eval_even_strategy(image, points, len_points, &phi->kernel, phi->length);
}
// naive implementation
uint32_t
ec_eval_small_chain(ec_curve_t *curve,
const ec_point_t *kernel,
int len,
ec_point_t *points,
unsigned len_points,
bool special) // do we allow special isogenies?
{
ec_point_t A24;
AC_to_A24(&A24, curve);
ec_kps2_t kps;
ec_point_t small_K, big_K;
copy_point(&big_K, kernel);
for (int i = 0; i < len; i++) {
copy_point(&small_K, &big_K);
// small_K = big_K;
for (int j = 0; j < len - i - 1; j++) {
xDBL_A24(&small_K, &small_K, &A24, false);
}
// Check the order of the point before the first isogeny step
if (i == 0 && !ec_is_two_torsion(&small_K, curve))
return (uint32_t)-1;
// Perform isogeny step
if (fp2_is_zero(&small_K.x)) {
if (special) {
ec_point_t B24;
xisog_2_singular(&kps, &B24, A24);
xeval_2_singular(&big_K, &big_K, 1, &kps);
xeval_2_singular(points, points, len_points, &kps);
copy_point(&A24, &B24);
} else {
return (uint32_t)-1;
}
} else {
xisog_2(&kps, &A24, small_K);
xeval_2(&big_K, &big_K, 1, &kps);
xeval_2(points, points, len_points, &kps);
}
}
A24_to_AC(curve, &A24);
curve->is_A24_computed_and_normalized = false;
return 0;
}
uint32_t
ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to)
{
fp2_t t0, t1, t2, t3, t4;
fp2_mul(&t0, &from->A, &from->C);
fp2_mul(&t1, &to->A, &to->C);
fp2_mul(&t2, &t1, &to->C); // toA*toC^2
fp2_add(&t3, &t2, &t2);
fp2_add(&t3, &t3, &t3);
fp2_add(&t3, &t3, &t3);
fp2_add(&t2, &t2, &t3); // 9*toA*toC^2
fp2_sqr(&t3, &to->A);
fp2_mul(&t3, &t3, &to->A); // toA^3
fp2_add(&t3, &t3, &t3);
fp2_sub(&isom->Nx, &t3, &t2); // 2*toA^3-9*toA*toC^2
fp2_mul(&t2, &t0, &from->A); // fromA^2*fromC
fp2_sqr(&t3, &from->C);
fp2_mul(&t3, &t3, &from->C); // fromC^3
fp2_add(&t4, &t3, &t3);
fp2_add(&t3, &t4, &t3); // 3*fromC^3
fp2_sub(&t3, &t3, &t2); // 3*fromC^3-fromA^2*fromC
fp2_mul(&isom->Nx, &isom->Nx, &t3); // lambda_x = (2*toA^3-9*toA*toC^2)*(3*fromC^3-fromA^2*fromC)
fp2_mul(&t2, &t0, &from->C); // fromA*fromC^2
fp2_add(&t3, &t2, &t2);
fp2_add(&t3, &t3, &t3);
fp2_add(&t3, &t3, &t3);
fp2_add(&t2, &t2, &t3); // 9*fromA*fromC^2
fp2_sqr(&t3, &from->A);
fp2_mul(&t3, &t3, &from->A); // fromA^3
fp2_add(&t3, &t3, &t3);
fp2_sub(&isom->D, &t3, &t2); // 2*fromA^3-9*fromA*fromC^2
fp2_mul(&t2, &t1, &to->A); // toA^2*toC
fp2_sqr(&t3, &to->C);
fp2_mul(&t3, &t3, &to->C); // toC^3
fp2_add(&t4, &t3, &t3);
fp2_add(&t3, &t4, &t3); // 3*toC^3
fp2_sub(&t3, &t3, &t2); // 3*toC^3-toA^2*toC
fp2_mul(&isom->D, &isom->D, &t3); // lambda_z = (2*fromA^3-9*fromA*fromC^2)*(3*toC^3-toA^2*toC)
// Mont -> SW -> SW -> Mont
fp2_mul(&t0, &to->C, &from->A);
fp2_mul(&t0, &t0, &isom->Nx); // lambda_x*toC*fromA
fp2_mul(&t1, &from->C, &to->A);
fp2_mul(&t1, &t1, &isom->D); // lambda_z*fromC*toA
fp2_sub(&isom->Nz, &t0, &t1); // lambda_x*toC*fromA - lambda_z*fromC*toA
fp2_mul(&t0, &from->C, &to->C);
fp2_add(&t1, &t0, &t0);
fp2_add(&t0, &t0, &t1); // 3*fromC*toC
fp2_mul(&isom->D, &isom->D, &t0); // 3*lambda_z*fromC*toC
fp2_mul(&isom->Nx, &isom->Nx, &t0); // 3*lambda_x*fromC*toC
return (fp2_is_zero(&isom->Nx) | fp2_is_zero(&isom->D));
}
void
ec_iso_eval(ec_point_t *P, ec_isom_t *isom)
{
fp2_t tmp;
fp2_mul(&P->x, &P->x, &isom->Nx);
fp2_mul(&tmp, &P->z, &isom->Nz);
fp2_add(&P->x, &P->x, &tmp);
fp2_mul(&P->z, &P->z, &isom->D);
}

View File

@@ -0,0 +1,143 @@
#include <bench.h>
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include <ec.h>
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
/******************************
Util functions
******************************/
int
cmp_u64(const void *v1, const void *v2)
{
uint64_t x1 = *(const uint64_t *)v1;
uint64_t x2 = *(const uint64_t *)v2;
if (x1 < x2) {
return -1;
} else if (x1 == x2) {
return 0;
} else {
return 1;
}
}
void
bench_basis_generation(unsigned int n, int iterations)
{
int i, j;
uint64_t cycles1, cycles2;
uint64_t cycle_runs[20];
ec_basis_t basis;
ec_curve_t curve;
ec_curve_init(&curve);
// Set a supersingular elliptic curve
// E : y^2 = x^3 + 6*x^2 + x
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
// Full even torsion generation without hints
for (i = 0; i < 20; i++) {
cycles1 = cpucycles();
for (j = 0; j < iterations; j++) {
(void)ec_curve_to_basis_2f_to_hint(&basis, &curve, n);
}
cycles2 = cpucycles();
cycle_runs[i] = cycles2 - cycles1;
}
qsort(cycle_runs + 10, 10, sizeof cycle_runs[0], cmp_u64);
printf(" 2^%d torsion generation takes .................................... %" PRIu64 " cycles\n",
n,
cycle_runs[4] / (iterations));
}
void
bench_basis_generation_from_hint(unsigned int n, int iterations)
{
int i, j;
uint64_t cycles1, cycles2;
uint64_t cycle_runs[20];
ec_basis_t basis;
ec_curve_t curve;
ec_curve_init(&curve);
// Set a supersingular elliptic curve
// E : y^2 = x^3 + 6*x^2 + x
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
uint8_t hint = ec_curve_to_basis_2f_to_hint(&basis, &curve, n);
// Full even torsion generation without hints
for (i = 0; i < 20; i++) {
cycles1 = cpucycles();
for (j = 0; j < iterations; j++) {
ec_curve_to_basis_2f_from_hint(&basis, &curve, n, hint);
}
cycles2 = cpucycles();
cycle_runs[i] = cycles2 - cycles1;
}
qsort(cycle_runs + 10, 10, sizeof cycle_runs[0], cmp_u64);
printf(" 2^%d torsion generation takes .................................... %" PRIu64 " cycles\n",
n,
cycle_runs[4] / (iterations));
}
void
bench_basis(int iterations)
{
printf("\n-------------------------------------------------------------------------------------"
"-------------------\n\n");
printf("Benchmarking E[2^n] basis generation for " STRINGIFY(SQISIGN_VARIANT) ": \n\n");
bench_basis_generation(TORSION_EVEN_POWER, iterations);
bench_basis_generation(128, iterations);
printf("\nBenchmarking E[2^n] basis generation with hint for " STRINGIFY(SQISIGN_VARIANT) ": \n\n");
bench_basis_generation_from_hint(TORSION_EVEN_POWER, iterations);
bench_basis_generation_from_hint(128, iterations);
}
int
main(int argc, char *argv[])
{
int iterations = 100 * SQISIGN_TEST_REPS;
int help = 0;
#ifndef NDEBUG
fprintf(stderr,
"\x1b[31mIt looks like SQIsign was compiled with assertions enabled.\n"
"This will severely impact performance measurements.\x1b[0m\n");
#endif
for (int i = 1; i < argc; i++) {
if (!help && strcmp(argv[i], "--help") == 0) {
help = 1;
continue;
}
if (sscanf(argv[i], "--iterations=%d", &iterations) == 1) {
continue;
}
}
if (help || iterations <= 0) {
printf("Usage: %s [--iterations=<iterations>]\n", argv[0]);
printf("Where <iterations> is the number of iterations used for benchmarking; if not "
"present, uses the default: %d)\n",
iterations);
return 1;
}
cpucycles_init();
bench_basis(iterations);
return 0;
}

View File

@@ -0,0 +1,195 @@
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include <ec.h>
/******************************
Test functions
******************************/
int
inner_test_generated_basis(ec_basis_t *basis, ec_curve_t *curve, unsigned int n)
{
unsigned int i;
int PASSED = 1;
ec_point_t P, Q;
copy_point(&P, &basis->P);
copy_point(&Q, &basis->Q);
// Double points to get point of order 2
for (i = 0; i < n - 1; i++) {
xDBL_A24(&P, &P, &curve->A24, curve->is_A24_computed_and_normalized);
xDBL_A24(&Q, &Q, &curve->A24, curve->is_A24_computed_and_normalized);
}
if (ec_is_zero(&P)) {
printf("Point P generated does not have full order\n");
PASSED = 0;
}
if (ec_is_zero(&Q)) {
printf("Point Q generated does not have full order\n");
PASSED = 0;
}
if (ec_is_equal(&P, &Q)) {
printf("Points P, Q are linearly dependent\n");
PASSED = 0;
}
if (!fp2_is_zero(&Q.x)) {
printf("Points Q is not above the Montgomery point\n");
PASSED = 0;
}
// This should give the identity
xDBL_A24(&P, &P, &curve->A24, curve->is_A24_computed_and_normalized);
xDBL_A24(&Q, &Q, &curve->A24, curve->is_A24_computed_and_normalized);
if (!ec_is_zero(&P)) {
printf("Point P generated does not have order exactly 2^n\n");
PASSED = 0;
}
if (!ec_is_zero(&Q)) {
printf("Point Q generated does not have order exactly 2^n\n");
PASSED = 0;
}
if (PASSED == 0) {
printf("Test failed with n = %u\n", n);
}
return PASSED;
}
int
inner_test_hint_basis(ec_basis_t *basis, ec_basis_t *basis_hint)
{
int PASSED = 1;
if (!ec_is_equal(&basis->P, &basis_hint->P)) {
printf("The points P do not match using the hint\n");
PASSED = 0;
}
if (!ec_is_equal(&basis->Q, &basis_hint->Q)) {
printf("The points Q do not match using the hint\n");
PASSED = 0;
}
if (!ec_is_equal(&basis->PmQ, &basis_hint->PmQ)) {
printf("The points PmQ do not match using the hint\n");
PASSED = 0;
}
if (PASSED == 0) {
printf("Test failed\n");
}
return PASSED;
}
/******************************
Test wrapper functions
******************************/
int
test_basis_generation_E0(unsigned int n)
{
ec_basis_t basis;
ec_curve_t curve;
ec_curve_init(&curve);
// Set a supersingular elliptic curve
// E : y^2 = x^3 + 6*x^2 + x
fp2_set_small(&(curve.A), 0);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
// Generate a basis
(void)ec_curve_to_basis_2f_to_hint(&basis, &curve, n);
// Test result
return inner_test_generated_basis(&basis, &curve, n);
}
int
test_basis_generation(unsigned int n)
{
ec_basis_t basis;
ec_curve_t curve;
ec_curve_init(&curve);
// Set a supersingular elliptic curve
// E : y^2 = x^3 + 6*x^2 + x
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
// Generate a basis
(void)ec_curve_to_basis_2f_to_hint(&basis, &curve, n);
// Test result
return inner_test_generated_basis(&basis, &curve, n);
}
int
test_basis_generation_with_hints(unsigned int n)
{
int check_1, check_2;
ec_basis_t basis, basis_hint;
ec_curve_t curve;
ec_curve_init(&curve);
// Set a supersingular elliptic curve
// E : y^2 = x^3 + 6*x^2 + x
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
// Generate a basis with hints
uint8_t hint = ec_curve_to_basis_2f_to_hint(&basis, &curve, n);
// Ensure the basis from the hint is good
check_1 = inner_test_generated_basis(&basis, &curve, n);
// Generate a basis using hints
ec_curve_to_basis_2f_from_hint(&basis_hint, &curve, n, hint);
// These two bases should be the same
check_2 = inner_test_hint_basis(&basis, &basis_hint);
return check_1 && check_2;
}
int
test_basis(void)
{
int passed;
// Test full order
passed = test_basis_generation(TORSION_EVEN_POWER);
passed &= test_basis_generation_with_hints(TORSION_EVEN_POWER);
// Test partial order
passed &= test_basis_generation(128);
passed &= test_basis_generation_with_hints(128);
// Special case when we have A = 0
passed &= test_basis_generation_E0(TORSION_EVEN_POWER);
passed &= test_basis_generation_E0(128);
return passed;
}
int
main(void)
{
bool ok;
ok = test_basis();
if (!ok) {
printf("Tests failed!\n");
} else {
printf("All basis generation tests passed.\n");
}
return !ok;
}

View File

@@ -0,0 +1,113 @@
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include <tools.h>
#include <mp.h>
#include "biextension.h"
#include <rng.h>
#include "bench.h"
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
void
biextension_bench(uint64_t bench)
{
uint64_t t0, t1;
uint32_t e = TORSION_EVEN_POWER;
fp2_t r1;
ec_curve_t curve;
ec_point_t tmp;
digit_t scal_r1[NWORDS_ORDER];
digit_t scal_r2[NWORDS_ORDER];
digit_t scal_s1[NWORDS_ORDER];
digit_t scal_s2[NWORDS_ORDER];
ec_basis_t BPQ, BRS;
// Get constants form curve E6 : y^2 = x^3 + 6*x^2 + x
ec_curve_init(&curve);
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
// Compute 2^e torsion on curve and copy to a second basis
(void)ec_curve_to_basis_2f_to_hint(&BPQ, &curve, e);
copy_basis(&BRS, &BPQ);
// Benchmark doubling on the curve
printf("\n\nBenchmarking doublings\n");
t0 = cpucycles();
for (uint64_t i = 0; i < bench; ++i) {
ec_dbl_iter(&tmp, e, &BPQ.P, &curve);
}
t1 = cpucycles();
printf("\x1b[34mAvg doubling: %'" PRIu64 " cycles\x1b[0m\n", (t1 - t0) / bench);
printf("\n\nBenchmarking (Weil) pairings\n");
t0 = cpucycles();
for (uint64_t i = 0; i < bench; ++i) {
weil(&r1, e, &BPQ.P, &BPQ.Q, &BPQ.PmQ, &curve);
}
t1 = cpucycles();
printf("\x1b[34mAvg pairing: %'" PRIu64 " cycles\x1b[0m\n", (t1 - t0) / bench);
printf("\n\nBenchmarking (Weil) dlogs\n");
t0 = cpucycles();
for (uint64_t i = 0; i < bench; ++i) {
ec_dlog_2_weil(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e);
}
t1 = cpucycles();
printf("\x1b[34mAvg pairing dlog: %'" PRIu64 " cycles\x1b[0m\n", (t1 - t0) / bench);
printf("\n\nBenchmarking (Tate) dlogs\n");
t0 = cpucycles();
for (uint64_t i = 0; i < bench; ++i) {
ec_dlog_2_tate(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e);
}
t1 = cpucycles();
printf("\x1b[34mAvg Tate dlog: %'" PRIu64 " cycles\x1b[0m\n", (t1 - t0) / bench);
}
int
main(int argc, char *argv[])
{
int iterations = 1000 * SQISIGN_TEST_REPS;
int help = 0;
#ifndef NDEBUG
fprintf(stderr,
"\x1b[31mIt looks like SQIsign was compiled with assertions enabled.\n"
"This will severely impact performance measurements.\x1b[0m\n");
#endif
for (int i = 1; i < argc; i++) {
if (!help && strcmp(argv[i], "--help") == 0) {
help = 1;
continue;
}
if (sscanf(argv[i], "--iterations=%d", &iterations) == 1) {
continue;
}
}
if (help || iterations <= 0) {
printf("Usage: %s [--iterations=<iterations>]\n", argv[0]);
printf("Where <iterations> is the number of iterations used for benchmarking; if not "
"present, uses the default: %d)\n",
iterations);
return 1;
}
cpucycles_init();
printf("Running biextension benchmarks for " STRINGIFY(SQISIGN_VARIANT) ":\n\n");
biextension_bench(iterations);
return 0;
}

View File

@@ -0,0 +1,259 @@
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include <tools.h>
#include <mp.h>
#include "biextension.h"
#include <rng.h>
#include <bench_test_arguments.h>
void
fp2_exp_2e(fp2_t *r, uint32_t e, const fp2_t *x)
{
fp2_copy(r, x);
for (uint32_t i = 0; i < e; i++) {
fp2_sqr(r, r);
}
}
void
biextension_test()
{
clock_t t;
ec_curve_t curve;
ec_basis_t even_torsion;
uint32_t e = TORSION_EVEN_POWER;
fp2_t one, r1, rr1, rrr1, r2, r3, tp;
ec_point_t P, Q, PmQ, A24;
ec_point_t tmp, tmp2, PQ, PP, QQ, PPQ, PQQ, PPP, QQQ, PPPQ, PQQQ;
// Get constants form curve E6 : y^2 = x^3 + 6*x^2 + x
ec_curve_init(&curve);
fp2_set_small(&(curve.A), 6);
fp2_set_one(&(curve.C));
ec_curve_normalize_A24(&curve);
copy_point(&A24, &curve.A24);
// Compute 2^e torsion on curve
(void)ec_curve_to_basis_2f_to_hint(&even_torsion, &curve, e);
copy_point(&P, &even_torsion.P);
copy_point(&Q, &even_torsion.Q);
copy_point(&PmQ, &even_torsion.PmQ);
printf("Testing order of points\n");
t = tic();
ec_dbl_iter(&tmp, e, &P, &curve);
TOC_clock(t, "Doublings");
assert(ec_is_zero(&tmp));
ec_dbl_iter(&tmp, e, &Q, &curve);
assert(ec_is_zero(&tmp));
ec_dbl_iter(&tmp, e, &PmQ, &curve);
assert(ec_is_zero(&tmp));
printf("Computing Weil pairing\n");
xADD(&PQ, &P, &Q, &PmQ);
t = tic();
weil(&r1, e, &P, &Q, &PQ, &curve);
TOC_clock(t, "Weil pairing");
printf("Computing Tate pairing\n");
t = tic();
reduced_tate(&tp, e, &P, &Q, &PQ, &curve);
TOC_clock(t, "Tate pairing");
printf("Testing order of Weil pairing\n");
fp2_set_one(&one);
fp2_exp_2e(&r2, e - 1, &r1);
assert(!fp2_is_equal(&r2, &one));
fp2_exp_2e(&r2, e, &r1);
assert(fp2_is_equal(&r2, &one));
printf("Testing order of Tate pairing\n");
fp2_set_one(&one);
fp2_exp_2e(&r2, e - 1, &tp);
assert(!fp2_is_equal(&r2, &one));
fp2_exp_2e(&r2, e, &tp);
assert(fp2_is_equal(&r2, &one));
printf("Bilinearity tests\n");
weil(&r2, e, &P, &Q, &PmQ, &curve);
fp2_inv(&r2);
assert(fp2_is_equal(&r1, &r2));
xDBL_A24(&PP, &P, &A24, false);
xDBL_A24(&QQ, &Q, &A24, false);
xADD(&PPQ, &PQ, &P, &Q);
xADD(&PQQ, &PQ, &Q, &P);
weil(&r2, e, &PP, &Q, &PPQ, &curve);
weil(&r3, e, &P, &QQ, &PQQ, &curve);
assert(fp2_is_equal(&r2, &r3));
fp2_sqr(&rr1, &r1);
assert(fp2_is_equal(&rr1, &r2));
xADD(&PPP, &PP, &P, &P);
xADD(&QQQ, &QQ, &Q, &Q);
xADD(&PPPQ, &PPQ, &P, &PQ);
xADD(&PQQQ, &PQQ, &Q, &PQ);
weil(&r2, e, &PPP, &Q, &PPPQ, &curve);
weil(&r3, e, &P, &QQQ, &PQQQ, &curve);
assert(fp2_is_equal(&r2, &r3));
fp2_mul(&rrr1, &rr1, &r1);
assert(fp2_is_equal(&rrr1, &r2));
printf("dlog tests\n");
ec_basis_t BPQ, BRS;
digit_t scal_r1[NWORDS_ORDER] = { 0 };
digit_t scal_r2[NWORDS_ORDER] = { 0 };
digit_t scal_s1[NWORDS_ORDER] = { 0 };
digit_t scal_s2[NWORDS_ORDER] = { 0 };
digit_t scal_d1[NWORDS_ORDER] = { 0 };
digit_t scal_d2[NWORDS_ORDER] = { 0 };
// original even torsion
BPQ = even_torsion;
BRS = even_torsion;
// alternative torsion, just mix the points up a little...
// not filling top word so the addition below can overflow into it
// so the scalars are "random enough" but we still keep the difference
// scal_d1 and scal_d2 required to compute the right multiple of RmS
randombytes((unsigned char *)scal_d1, (NWORDS_ORDER - 1) * sizeof(digit_t));
randombytes((unsigned char *)scal_d2, (NWORDS_ORDER - 1) * sizeof(digit_t));
randombytes((unsigned char *)scal_s1, (NWORDS_ORDER - 1) * sizeof(digit_t));
randombytes((unsigned char *)scal_s2, (NWORDS_ORDER - 1) * sizeof(digit_t));
// Ensure that r1*s2 - r2*s1 is odd such that the matrix
// [[r1, r2], [s1, s2]] is invertible
scal_s1[0] = (scal_s1[0] & ((digit_t)(-1) - 1)) + 1; // s1 needs to be odd
scal_d1[0] = (scal_d1[0] & ((digit_t)(-1) - 1)); // d1 needs to be even to make r1 odd
scal_s2[0] = (scal_s2[0] & ((digit_t)(-1) - 1)) + 1; // s2 needs to be odd
scal_d2[0] = (scal_d2[0] & ((digit_t)(-1) - 1)) + 1; // d2 needs to be odd to make r2 even
// Compute r1 and r2 from the difference di = ri - si
mp_add(scal_r1, scal_d1, scal_s1, NWORDS_ORDER);
mp_add(scal_r2, scal_d2, scal_s2, NWORDS_ORDER);
ec_biscalar_mul(&BRS.P, scal_r1, scal_r2, e, &BPQ, &curve);
ec_biscalar_mul(&BRS.Q, scal_s1, scal_s2, e, &BPQ, &curve);
ec_biscalar_mul(&BRS.PmQ, scal_d1, scal_d2, e, &BPQ, &curve);
printf("mixed\n");
// Now solve the discrete log
ec_dlog_2_weil(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e);
// assert everything matches
// R = [r1]P + [r2]Q
ec_biscalar_mul(&tmp, scal_r1, scal_r2, e, &BPQ, &curve);
assert(ec_is_equal(&tmp, &BRS.P));
// S = [s1]P + [s2]Q
ec_biscalar_mul(&tmp, scal_s1, scal_s2, e, &BPQ, &curve);
assert(ec_is_equal(&tmp, &BRS.Q));
printf("weil solved\n");
// now repeat using the tate pairing
ec_dlog_2_tate(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e);
// assert everything matches
// R = [r1]P + [r2]Q
ec_biscalar_mul(&tmp, scal_r1, scal_r2, e, &BPQ, &curve);
assert(ec_is_equal(&tmp, &BRS.P));
// S = [s1]P + [s2]Q
ec_biscalar_mul(&tmp, scal_s1, scal_s2, e, &BPQ, &curve);
assert(ec_is_equal(&tmp, &BRS.Q));
printf("tate solved\n");
// now we try with bases for partial torsion E[2^e] with e < e_full
int e_full = TORSION_EVEN_POWER;
int e_partial = 126;
ec_dbl_iter(&BRS.P, e_full - e_partial, &BRS.P, &curve);
ec_dbl_iter(&BRS.Q, e_full - e_partial, &BRS.Q, &curve);
ec_dbl_iter(&BRS.PmQ, e_full - e_partial, &BRS.PmQ, &curve);
ec_dlog_2_tate(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e_partial);
ec_biscalar_mul(&tmp, scal_r1, scal_r2, e, &BPQ, &curve);
ec_dbl_iter(&tmp, e_full - e_partial, &tmp, &curve);
assert(ec_is_equal(&tmp, &BRS.P));
// S = [s1]P + [s2]Q
// then S = [2^e_diff] S
ec_biscalar_mul(&tmp, scal_s1, scal_s2, e, &BPQ, &curve);
ec_dbl_iter(&tmp, e_full - e_partial, &tmp, &curve);
assert(ec_is_equal(&tmp, &BRS.Q));
printf("tate from full basis solved\n");
ec_dlog_2_tate(scal_r1, scal_r2, scal_s1, scal_s2, &BPQ, &BRS, &curve, e_partial);
mp_invert_matrix(scal_r1, scal_r2, scal_s1, scal_s2, e_partial, NWORDS_ORDER);
// assert everything matches
ec_biscalar_mul(&tmp, scal_r1, scal_r2, e, &BRS, &curve);
ec_dbl_iter(&tmp2, e_full - e_partial, &BPQ.P, &curve);
assert(ec_is_equal(&tmp, &tmp2));
ec_biscalar_mul(&tmp, scal_s1, scal_s2, e, &BRS, &curve);
ec_dbl_iter(&tmp2, e_full - e_partial, &BPQ.Q, &curve);
assert(ec_is_equal(&tmp, &tmp2));
printf("tate to full basis solved\n");
}
int
main(int argc, char *argv[])
{
uint32_t seed[12] = { 0 };
int help = 0;
int seed_set = 0;
for (int i = 1; i < argc; i++) {
if (!help && strcmp(argv[i], "--help") == 0) {
help = 1;
continue;
}
if (!seed_set && !parse_seed(argv[i], seed)) {
seed_set = 1;
continue;
}
}
if (help) {
printf("Usage: %s [--seed=<seed>]\n", argv[0]);
printf("Where <seed> is the random seed to be used; if not present, a random seed is "
"generated\n");
return 1;
}
if (!seed_set) {
randombytes_select((unsigned char *)seed, sizeof(seed));
}
print_seed(seed);
#if defined(TARGET_BIG_ENDIAN)
for (int i = 0; i < 12; i++) {
seed[i] = BSWAP32(seed[i]);
}
#endif
randombytes_init((unsigned char *)seed, NULL, 256);
printf("Running biextension unit tests\n");
biextension_test();
// Failures will be caught by asserts in biextension_test
printf("\nAll tests passed!\n");
return 0;
}

View File

@@ -0,0 +1,163 @@
#include <bench.h>
#include <bench_test_arguments.h>
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include "test_extras.h"
#include <ec.h>
#include <isog.h>
#include <rng.h>
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
uint64_t
bench_xDBL(unsigned int Nbench)
{
uint64_t cycles0, cycles1;
unsigned int i;
ec_point_t P[Nbench], A24[Nbench];
for (i = 0; i < Nbench; i++) {
fp2_random_test(&(P[i].x));
fp2_random_test(&(P[i].z));
fp2_random_test(&(A24[i].x));
fp2_random_test(&(A24[i].z));
}
cycles0 = cpucycles();
for (i = 0; i < Nbench; i++) {
xDBL(&P[i], &P[i], &A24[i]);
}
cycles1 = cpucycles();
return cycles1 - cycles0;
}
uint64_t
bench_xEVAL4(unsigned int Nbench)
{
uint64_t cycles0, cycles1;
unsigned int i;
ec_point_t P[Nbench];
ec_kps4_t KPS[Nbench];
for (i = 0; i < Nbench; i++) {
fp2_random_test(&(P[i].x));
fp2_random_test(&(P[i].z));
for (int j = 0; j < 3; j++) {
fp2_random_test(&(KPS[i].K[j].x));
fp2_random_test(&(KPS[i].K[j].z));
}
}
cycles0 = cpucycles();
for (i = 0; i < Nbench; i++) {
xeval_4(&P[i], &P[i], 1, &KPS[i]);
}
cycles1 = cpucycles();
return cycles1 - cycles0;
}
uint64_t
bench_isog_strategy(unsigned int Nbench)
{
uint64_t cycles0, cycles1;
unsigned int i;
ec_curve_t E0;
ec_isog_even_t phi[Nbench];
ec_basis_t basis2;
ec_curve_init(&E0);
fp2_set_small(&(E0.A), 6);
fp2_set_one(&(E0.C));
(void)ec_curve_to_basis_2f_to_hint(&basis2, &E0, TORSION_EVEN_POWER);
for (i = 0; i < Nbench; i++) {
copy_curve(&phi[i].curve, &E0);
phi[i].length = TORSION_EVEN_POWER;
if (i == 0) {
xADD(&phi[i].kernel, &basis2.P, &basis2.Q, &basis2.PmQ);
}
if (i == 1) {
xADD(&phi[i].kernel, &phi[i - 1].kernel, &basis2.Q, &basis2.P);
}
if (i > 1) {
xADD(&phi[i].kernel, &phi[i - 1].kernel, &basis2.Q, &phi[i - 2].kernel);
}
}
cycles0 = cpucycles();
for (i = 2; i < Nbench; i++) {
if (ec_eval_even(&phi[i].curve, &phi[i], NULL, 0)) {
printf("Failed isogeny strategy\n");
return 0;
}
}
cycles1 = cpucycles();
return cycles1 - cycles0;
}
int
main(int argc, char *argv[])
{
uint32_t seed[12] = { 0 };
int iterations = 100 * SQISIGN_TEST_REPS;
int help = 0;
int seed_set = 0;
#ifndef NDEBUG
fprintf(stderr,
"\x1b[31mIt looks like SQIsign was compiled with assertions enabled.\n"
"This will severely impact performance measurements.\x1b[0m\n");
#endif
for (int i = 1; i < argc; i++) {
if (!help && strcmp(argv[i], "--help") == 0) {
help = 1;
continue;
}
if (!seed_set && !parse_seed(argv[i], seed)) {
seed_set = 1;
continue;
}
if (sscanf(argv[i], "--iterations=%d", &iterations) == 1) {
continue;
}
}
if (help || iterations <= 0) {
printf("Usage: %s [--iterations=<iterations>] [--seed=<seed>]\n", argv[0]);
printf("Where <iterations> is the number of iterations used for benchmarking; if not "
"present, uses the default: %d)\n",
iterations);
printf("Where <seed> is the random seed to be used; if not present, a random seed is "
"generated\n");
return 1;
}
if (!seed_set) {
randombytes_select((unsigned char *)seed, sizeof(seed));
}
print_seed(seed);
#if defined(TARGET_BIG_ENDIAN)
for (int i = 0; i < 12; i++) {
seed[i] = BSWAP32(seed[i]);
}
#endif
randombytes_init((unsigned char *)seed, NULL, 256);
cpucycles_init();
printf("Benchmarking elliptic curve arithmetic for " STRINGIFY(SQISIGN_VARIANT) ":\n\n");
uint64_t cycles;
cycles = bench_xDBL(10 * iterations);
printf("Bench xDBL_A24:\t%" PRIu64 " cycles\n", cycles / (10 * iterations));
cycles = bench_xEVAL4(iterations);
printf("Bench xEVAL4:\t%" PRIu64 " cycles\n", cycles / iterations);
cycles = bench_isog_strategy(iterations);
printf("Bench isog strategy:\t%" PRIu64 " cycles\n", cycles / iterations);
return 0;
}

View File

@@ -0,0 +1,404 @@
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include "test_extras.h"
#include <ec.h>
#include <isog.h>
#include <rng.h>
#include <bench_test_arguments.h>
/******************************
Test functions
******************************/
int
test_xDBL_xADD(const ec_curve_t *curve, unsigned int Ntest)
{
unsigned int i;
ec_point_t P, Q, PQ, R1, R2;
for (i = 0; i < Ntest; i++) {
ec_random_test(&P, curve);
ec_random_test(&Q, curve);
projective_difference_point(&PQ, &P, &Q, curve);
// 2(P + Q) = 2P + 2Q
xADD(&R1, &P, &Q, &PQ);
ec_dbl(&R1, &R1, curve);
ec_dbl(&P, &P, curve);
ec_dbl(&Q, &Q, curve);
ec_dbl(&PQ, &PQ, curve);
xADD(&R2, &P, &Q, &PQ);
if (!ec_is_equal(&R1, &R2)) {
printf("Failed 2(P + Q) = 2P + 2Q\n");
return 1;
}
// (P+Q) + (P-Q) = 2P
xADD(&R1, &P, &Q, &PQ);
ec_dbl(&Q, &Q, curve);
xADD(&R1, &R1, &PQ, &Q);
ec_dbl(&P, &P, curve);
ec_dbl(&PQ, &PQ, curve);
if (!ec_is_equal(&R1, &P)) {
printf("Failed (P+Q) + (P-Q) = 2P\n");
return 1;
}
}
return 0;
}
int
test_xDBLADD(const ec_curve_t *curve, unsigned int Ntest)
{
unsigned int i;
ec_point_t P, Q, PQ, R1, R2;
ec_point_t A24;
AC_to_A24(&A24, curve);
for (i = 0; i < Ntest; i++) {
ec_random_test(&P, curve);
ec_random_test(&Q, curve);
projective_difference_point(&PQ, &P, &Q, curve);
xDBLADD(&R1, &R2, &P, &Q, &PQ, &A24, false);
xADD(&PQ, &P, &Q, &PQ);
if (!ec_is_equal(&R2, &PQ)) {
printf("Failed addition in xDBLADD\n");
return 1;
}
ec_dbl(&P, &P, curve);
if (!ec_is_equal(&R1, &P)) {
printf("Failed doubling in xDBLADD\n");
return 1;
}
}
return 0;
}
int
test_xDBL_variants(ec_curve_t *curve, unsigned int Ntest)
{
unsigned int i;
ec_curve_t E;
ec_point_t P, R1, R2, R3, R4;
ec_point_t A24, A24norm;
fp2_t z;
AC_to_A24(&A24, curve);
copy_point(&A24norm, &A24);
ec_normalize_point(&A24norm);
// Randomize projective representation
copy_curve(&E, curve);
fp2_random_test(&z);
fp2_mul(&(E.A24.x), &(A24.x), &z);
fp2_mul(&(E.A24.z), &(A24.z), &z);
E.is_A24_computed_and_normalized = false;
for (i = 0; i < Ntest; i++) {
ec_random_test(&P, curve);
xDBL(&R1, &P, (const ec_point_t *)curve);
xDBL_A24(&R2, &P, &(E.A24), false);
xDBL_A24(&R3, &P, &A24norm, true);
xDBL_E0(&R4, &P);
if (!ec_is_equal(&R1, &R2)) {
printf("xDBL and xDBL_A24 dont match\n");
return 1;
}
if (!ec_is_equal(&R1, &R3)) {
printf("xDBL and xDBL_A24 normalized dont match\n");
return 1;
}
if (!ec_is_equal(&R1, &R4)) {
printf("xDBL and xDBL_E0 dont match\n");
return 1;
}
}
return 0;
}
int
test_zero_identities(ec_curve_t *curve, unsigned int Ntest)
{
unsigned int i;
ec_point_t P, Q, R, ec_zero;
fp2_set_one(&(P.x));
fp2_set_zero(&(P.z));
fp2_set_one(&(ec_zero.x));
fp2_set_zero(&(ec_zero.z));
assert(ec_is_zero(&P));
for (i = 0; i < Ntest; i++) {
ec_random_test(&P, curve);
xADD(&R, &ec_zero, &ec_zero, &ec_zero);
if (!ec_is_zero(&R)) {
printf("Failed 0 + 0 = 0\n");
return 1;
}
ec_dbl(&R, &P, curve);
xADD(&R, &P, &P, &R);
if (!ec_is_zero(&R)) {
printf("Failed P - P = 0\n");
return 1;
}
ec_dbl(&R, &ec_zero, curve);
if (!ec_is_zero(&R)) {
printf("Failed 2*0 = 0\n");
return 1;
}
xADD(&R, &P, &ec_zero, &P);
if (!ec_is_equal(&R, &P)) {
printf("Failed P + 0 = P\n");
return 1;
}
xADD(&R, &ec_zero, &P, &P);
if (!ec_is_equal(&R, &P)) {
printf("Failed P + 0 = P\n");
return 1;
}
xDBLADD(&R, &Q, &P, &ec_zero, &P, &curve->A24, false);
if (!ec_is_equal(&Q, &P)) {
printf("Failed P + 0 = P in xDBLADD\n");
return 1;
}
xDBLADD(&R, &Q, &ec_zero, &P, &P, &curve->A24, false);
if (!ec_is_equal(&Q, &P)) {
printf("Failed P + 0 = P in xDBLADD\n");
return 1;
}
if (!ec_is_zero(&R)) {
printf("Failed 2*0 = 0 in xDBLADD\n");
return 1;
}
}
return 0;
}
int
test_jacobian(const ec_curve_t *curve, unsigned int Ntest)
{
unsigned int i;
ec_point_t P, Q;
jac_point_t R, S, T, U, jac_zero;
fp2_t t0, t1;
jac_init(&jac_zero);
for (i = 0; i < Ntest; i++) {
ec_random_test(&P, curve);
ec_normalize_point(&P);
ec_random_test(&Q, curve);
ec_normalize_point(&Q);
/* Convert to Jacobian coordinates. */
fp2_copy(&(S.x), &(P.x));
ec_recover_y(&(S.y), &(S.x), curve);
fp2_set_one(&(S.z));
fp2_copy(&(T.x), &(Q.x));
ec_recover_y(&(T.y), &(T.x), curve);
fp2_set_one(&(T.z));
ADD(&R, &jac_zero, &jac_zero, curve);
if (!jac_is_equal(&R, &jac_zero)) {
printf("Failed 0 + 0 = 0 in jac\n");
return 1;
}
DBL(&R, &jac_zero, curve);
if (!jac_is_equal(&R, &jac_zero)) {
printf("Failed 2*0 = 0 in jac\n");
return 1;
}
jac_neg(&R, &S);
ADD(&R, &S, &R, curve);
if (!jac_is_equal(&R, &jac_zero)) {
printf("Failed P - P = 0 in jac\n");
return 1;
}
ADD(&R, &S, &jac_zero, curve);
if (!jac_is_equal(&R, &S)) {
printf("Failed P + 0 = P in jac\n");
return 1;
}
ADD(&R, &jac_zero, &S, curve);
if (!jac_is_equal(&R, &S)) {
printf("Failed P + 0 = P in jac\n");
return 1;
}
ADD(&R, &S, &jac_zero, curve);
if (!jac_is_equal(&R, &S)) {
printf("Failed 0 + P = P in jac\n");
return 1;
}
DBL(&R, &S, curve);
ADD(&U, &S, &S, curve);
if (!jac_is_equal(&R, &U)) {
printf("Failed P + P = 2*P in jac\n");
return 1;
}
ADD(&R, &T, &S, curve);
ADD(&T, &S, &T, curve);
if (!jac_is_equal(&R, &T)) {
printf("Failed P + Q = Q + P in jac\n");
return 1;
}
ADD(&R, &T, &S, curve);
ADD(&U, &S, &T, curve);
if (!jac_is_equal(&R, &U)) {
printf("Failed P + Q = Q + P in jac\n");
return 1;
}
// Double R to make it different than (T + S).
DBL(&R, &R, curve);
ADD(&U, &S, &T, curve);
ADD(&U, &U, &R, curve);
ADD(&R, &R, &T, curve);
ADD(&R, &R, &S, curve);
if (!jac_is_equal(&R, &U)) {
printf("Failed (P + Q) + R = P + (Q + R) in jac\n");
return 1;
}
jac_to_ws(&R, &t0, &t1, &jac_zero, curve);
jac_from_ws(&R, &R, &t1, curve);
if (!jac_is_equal(&R, &jac_zero)) {
printf("Failed converting to Weierstrass\n");
return 1;
}
jac_to_ws(&R, &t0, &t1, &S, curve);
jac_from_ws(&R, &R, &t1, curve);
if (!jac_is_equal(&S, &R)) {
printf("Failed converting to Weierstrass\n");
return 1;
}
DBL(&S, &S, curve);
jac_to_ws(&R, &t0, &t1, &S, curve);
jac_from_ws(&R, &R, &t1, curve);
if (!jac_is_equal(&S, &R)) {
printf("Failed converting to Weierstrass\n");
return 1;
}
jac_to_ws(&R, &t0, &t1, &jac_zero, curve);
DBLW(&R, &t0, &R, &t0);
jac_from_ws(&R, &R, &t1, curve);
if (!jac_is_equal(&R, &jac_zero)) {
printf("Failed 2*0 = 0 in Weierstrass\n");
return 1;
}
jac_to_ws(&R, &t0, &t1, &S, curve);
DBLW(&R, &t0, &R, &t0);
jac_from_ws(&R, &R, &t1, curve);
DBL(&S, &S, curve);
if (!jac_is_equal(&S, &R)) {
printf("Failed doubling in Weierstrass\n");
return 1;
}
}
return 0;
}
int
main(int argc, char *argv[])
{
uint32_t seed[12] = { 0 };
int iterations = 100 * SQISIGN_TEST_REPS;
int help = 0;
int seed_set = 0;
int res = 0;
for (int i = 1; i < argc; i++) {
if (!help && strcmp(argv[i], "--help") == 0) {
help = 1;
continue;
}
if (!seed_set && !parse_seed(argv[i], seed)) {
seed_set = 1;
continue;
}
if (sscanf(argv[i], "--iterations=%d", &iterations) == 1) {
continue;
}
}
if (help || iterations <= 0) {
printf("Usage: %s [--iterations=<iterations>] [--seed=<seed>]\n", argv[0]);
printf("Where <iterations> is the number of iterations used for testing; if not "
"present, uses the default: %d)\n",
iterations);
printf("Where <seed> is the random seed to be used; if not present, a random seed is "
"generated\n");
return 1;
}
if (!seed_set) {
randombytes_select((unsigned char *)seed, sizeof(seed));
}
print_seed(seed);
#if defined(TARGET_BIG_ENDIAN)
for (int i = 0; i < 12; i++) {
seed[i] = BSWAP32(seed[i]);
}
#endif
randombytes_init((unsigned char *)seed, NULL, 256);
// Curve A=6
ec_curve_t curve;
ec_curve_init(&curve);
fp2_set_small(&(curve.A), 0);
fp2_set_small(&(curve.C), 1);
// fp2_random_test(&(curve.C));
// fp2_mul(&(curve.A), &(curve.A), &(curve.C));
ec_curve_normalize_A24(&curve);
res |= test_xDBL_xADD(&curve, iterations);
res |= test_xDBLADD(&curve, iterations);
res |= test_xDBL_variants(&curve, iterations);
res |= test_zero_identities(&curve, iterations);
res |= test_jacobian(&curve, iterations);
fp2_random_test(&(curve.C));
fp2_mul(&(curve.A), &(curve.A), &(curve.C));
ec_curve_normalize_A24(&curve);
res |= test_xDBL_xADD(&curve, iterations);
res |= test_xDBLADD(&curve, iterations);
res |= test_xDBL_variants(&curve, iterations);
res |= test_zero_identities(&curve, iterations);
res |= test_jacobian(&curve, iterations);
if (res) {
printf("Tests failed!\n");
} else {
printf("All ec arithmetic tests passed.\n");
}
return res;
}

View File

@@ -0,0 +1,116 @@
#include "test_extras.h"
#include "rng.h"
// Make n random-ish field elements (for tests only!).
void
fp_random_test(fp_t *a)
{
uint8_t tmp[FP_ENCODED_BYTES];
randombytes(tmp, sizeof(tmp));
fp_decode_reduce(a, tmp, sizeof(tmp));
}
void
fp2_random_test(fp2_t *a)
{
fp_random_test(&(a->re));
fp_random_test(&(a->im));
}
// Given an x-coordinate, determines if this is a valid
// point on the curve. Assumes C=1.
static uint32_t
projective_is_on_curve(const ec_point_t *P, const ec_curve_t *curve)
{
fp2_t t0, t1, t2;
// Check if xz*(C^2x^2+zACx+z^2C^2) is a square
fp2_mul(&t0, &curve->C, &P->x);
fp2_mul(&t1, &t0, &P->z);
fp2_mul(&t1, &t1, &curve->A);
fp2_mul(&t2, &curve->C, &P->z);
fp2_sqr(&t0, &t0);
fp2_sqr(&t2, &t2);
fp2_add(&t0, &t0, &t1);
fp2_add(&t0, &t0, &t2);
fp2_mul(&t0, &t0, &P->x);
fp2_mul(&t0, &t0, &P->z);
return fp2_is_square(&t0) || fp2_is_zero(&t0);
}
void
ec_random_normalized_test(ec_point_t *P, const ec_curve_t *curve)
{
fp2_set_one(&P->z);
while (1) {
fp2_random_test(&P->x);
if (projective_is_on_curve(P, curve)) {
break;
}
}
}
void
ec_random_test(ec_point_t *P, const ec_curve_t *curve)
{
ec_random_normalized_test(P, curve);
fp2_random_test(&P->z);
fp2_mul(&P->x, &P->x, &P->z);
}
void
projective_difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve)
{
// Given P,Q in projective x-only, computes a deterministic choice for (P-Q)
// Based on Proposition 3 of https://eprint.iacr.org/2017/518.pdf
fp2_t Bxx, Bxz, Bzz, t0, t1;
fp2_mul(&t0, &P->x, &Q->x);
fp2_mul(&t1, &P->z, &Q->z);
fp2_sub(&Bxx, &t0, &t1);
fp2_sqr(&Bxx, &Bxx);
fp2_mul(&Bxx, &Bxx, &curve->C); // C*(P.x*Q.x-P.z*Q.z)^2
fp2_add(&Bxz, &t0, &t1);
fp2_mul(&t0, &P->x, &Q->z);
fp2_mul(&t1, &P->z, &Q->x);
fp2_add(&Bzz, &t0, &t1);
fp2_mul(&Bxz, &Bxz, &Bzz); // (P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x)
fp2_sub(&Bzz, &t0, &t1);
fp2_sqr(&Bzz, &Bzz);
fp2_mul(&Bzz, &Bzz, &curve->C); // C*(P.x*Q.z-P.z*Q.x)^2
fp2_mul(&Bxz, &Bxz, &curve->C); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x)
fp2_mul(&t0, &t0, &t1);
fp2_mul(&t0, &t0, &curve->A);
fp2_add(&t0, &t0, &t0);
fp2_add(&Bxz, &Bxz, &t0); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + 2*A*P.x*Q.z*P.z*Q.x
// Normalization: our squareroot always has the same sign as long as P.z, Q.z, and C
// are in Fp and C is a square, so the B's should be scaled by C*C_bar^2*P.z_bar^2*Q.Z_bar^2
fp_copy(&t0.re, &curve->C.re);
fp_neg(&t0.im, &curve->C.im);
fp2_sqr(&t0, &t0);
fp2_mul(&t0, &t0, &curve->C);
fp_copy(&t1.re, &P->z.re);
fp_neg(&t1.im, &P->z.im);
fp2_sqr(&t1, &t1);
fp2_mul(&t0, &t0, &t1);
fp_copy(&t1.re, &Q->z.re);
fp_neg(&t1.im, &Q->z.im);
fp2_sqr(&t1, &t1);
fp2_mul(&t0, &t0, &t1);
fp2_mul(&Bxx, &Bxx, &t0);
fp2_mul(&Bxz, &Bxz, &t0);
fp2_mul(&Bzz, &Bzz, &t0);
// Solving quadratic equation
fp2_sqr(&t0, &Bxz);
fp2_mul(&t1, &Bxx, &Bzz);
fp2_sub(&t0, &t0, &t1);
fp2_sqrt(&t0);
fp2_add(&PQ->x, &Bxz, &t0);
fp2_copy(&PQ->z, &Bzz);
}

View File

@@ -0,0 +1,43 @@
#ifndef TEST_EXTRAS_H
#define TEST_EXTRAS_H
#include <assert.h>
#include <time.h>
#include <stdlib.h>
#include <encoded_sizes.h>
#include <ec.h>
#include <fp.h>
#include <fp2.h>
#define PASSED 0
#define FAILED 1
// Generating a pseudo-random field element in [0, p-1]
void fp_random_test(fp_t *a);
// Generating a pseudo-random element in GF(p^2)
void fp2_random_test(fp2_t *a);
// Generating a random projective x-only point
void ec_random_test(ec_point_t *P, const ec_curve_t *curve);
// Generating a random projective x-only point and normalizing it
void ec_random_normalized_test(ec_point_t *P, const ec_curve_t *curve);
// Point difference
void projective_difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve);
// xDBL
void xDBL(ec_point_t *Q, const ec_point_t *P, const ec_point_t *AC);
// Double-and-add
extern void xDBLADD(ec_point_t *R,
ec_point_t *S,
const ec_point_t *P,
const ec_point_t *Q,
const ec_point_t *PQ,
const ec_point_t *A24,
const bool A24_normalized);
#endif

64
src/ec/ref/lvlx/xeval.c Normal file
View File

@@ -0,0 +1,64 @@
#include "isog.h"
#include "ec.h"
#include <assert.h>
// -----------------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------------
// Degree-2 isogeny evaluation with kenerl generated by P != (0, 0)
void
xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps)
{
fp2_t t0, t1, t2;
for (int j = 0; j < lenQ; j++) {
fp2_add(&t0, &Q[j].x, &Q[j].z);
fp2_sub(&t1, &Q[j].x, &Q[j].z);
fp2_mul(&t2, &kps->K.x, &t1);
fp2_mul(&t1, &kps->K.z, &t0);
fp2_add(&t0, &t2, &t1);
fp2_sub(&t1, &t2, &t1);
fp2_mul(&R[j].x, &Q[j].x, &t0);
fp2_mul(&R[j].z, &Q[j].z, &t1);
}
}
void
xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps)
{
fp2_t t0, t1;
for (int i = 0; i < lenQ; i++) {
fp2_mul(&t0, &Q[i].x, &Q[i].z);
fp2_mul(&t1, &kps->K.x, &Q[i].z);
fp2_add(&t1, &t1, &Q[i].x);
fp2_mul(&t1, &t1, &Q[i].x);
fp2_sqr(&R[i].x, &Q[i].z);
fp2_add(&R[i].x, &R[i].x, &t1);
fp2_mul(&R[i].z, &t0, &kps->K.z);
}
}
// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P != (0, 0)
void
xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps)
{
const ec_point_t *K = kps->K;
fp2_t t0, t1;
for (int i = 0; i < lenQ; i++) {
fp2_add(&t0, &Q[i].x, &Q[i].z);
fp2_sub(&t1, &Q[i].x, &Q[i].z);
fp2_mul(&(R[i].x), &t0, &K[1].x);
fp2_mul(&(R[i].z), &t1, &K[2].x);
fp2_mul(&t0, &t0, &t1);
fp2_mul(&t0, &t0, &K[0].x);
fp2_add(&t1, &(R[i].x), &(R[i].z));
fp2_sub(&(R[i].z), &(R[i].x), &(R[i].z));
fp2_sqr(&t1, &t1);
fp2_sqr(&(R[i].z), &(R[i].z));
fp2_add(&(R[i].x), &t0, &t1);
fp2_sub(&t0, &t0, &(R[i].z));
fp2_mul(&(R[i].x), &(R[i].x), &t1);
fp2_mul(&(R[i].z), &(R[i].z), &t0);
}
}

61
src/ec/ref/lvlx/xisog.c Normal file
View File

@@ -0,0 +1,61 @@
#include "isog.h"
#include "ec.h"
#include <assert.h>
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
// Degree-2 isogeny with kernel generated by P != (0 ,0)
// Outputs the curve coefficient in the form A24=(A+2C:4C)
void
xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P)
{
fp2_sqr(&B->x, &P.x);
fp2_sqr(&B->z, &P.z);
fp2_sub(&B->x, &B->z, &B->x);
fp2_add(&kps->K.x, &P.x, &P.z);
fp2_sub(&kps->K.z, &P.x, &P.z);
}
void
xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24)
{
// No need to check the square root, only used for signing.
fp2_t t0, four;
fp2_set_small(&four, 4);
fp2_add(&t0, &A24.x, &A24.x);
fp2_sub(&t0, &t0, &A24.z);
fp2_add(&t0, &t0, &t0);
fp2_inv(&A24.z);
fp2_mul(&t0, &t0, &A24.z);
fp2_copy(&kps->K.x, &t0);
fp2_add(&B24->x, &t0, &t0);
fp2_sqr(&t0, &t0);
fp2_sub(&t0, &t0, &four);
fp2_sqrt(&t0);
fp2_neg(&kps->K.z, &t0);
fp2_add(&B24->z, &t0, &t0);
fp2_add(&B24->x, &B24->x, &B24->z);
fp2_add(&B24->z, &B24->z, &B24->z);
}
// Degree-4 isogeny with kernel generated by P such that [2]P != (0 ,0)
// Outputs the curve coefficient in the form A24=(A+2C:4C)
void
xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P)
{
ec_point_t *K = kps->K;
fp2_sqr(&K[0].x, &P.x);
fp2_sqr(&K[0].z, &P.z);
fp2_add(&K[1].x, &K[0].z, &K[0].x);
fp2_sub(&K[1].z, &K[0].z, &K[0].x);
fp2_mul(&B->x, &K[1].x, &K[1].z);
fp2_sqr(&B->z, &K[0].z);
// Constants for xeval_4
fp2_add(&K[2].x, &P.x, &P.z);
fp2_sub(&K[1].x, &P.x, &P.z);
fp2_add(&K[0].x, &K[0].z, &K[0].z);
fp2_add(&K[0].x, &K[0].x, &K[0].x);
}

View File

@@ -0,0 +1,32 @@
add_executable(curve-arith.test_${SVARIANT_LOWER} ${LVLX_DIR}/test/curve-arith-test.c ${LVLX_DIR}/test/test_extras.c)
target_include_directories(curve-arith.test_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(curve-arith.test_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}} sqisign_common_test)
add_executable(biextension.test_${SVARIANT_LOWER} ${LVLX_DIR}/test/biextension-test.c)
target_include_directories(biextension.test_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(biextension.test_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}} sqisign_common_test)
add_executable(basis-gen.test_${SVARIANT_LOWER} ${LVLX_DIR}/test/basis-gen-test.c)
target_include_directories(basis-gen.test_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${LVLX_DIR}/test ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(basis-gen.test_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}})
add_test(curve_arith.test_${SVARIANT_LOWER} curve-arith.test_${SVARIANT_LOWER})
add_test(ec_biextension.test_${SVARIANT_LOWER} biextension.test_${SVARIANT_LOWER})
add_test(ec_basis_gen.test_${SVARIANT_LOWER} basis-gen.test_${SVARIANT_LOWER})
add_executable(curve-arith.bench_${SVARIANT_LOWER} ${LVLX_DIR}/test/curve-arith-bench.c ${LVLX_DIR}/test/test_extras.c)
target_include_directories(curve-arith.bench_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(curve-arith.bench_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}} sqisign_common_sys)
add_executable(biextension.bench_${SVARIANT_LOWER} ${LVLX_DIR}/test/biextension-bench.c)
target_include_directories(biextension.bench_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(biextension.bench_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}} sqisign_common_sys)
add_executable(basis-gen.bench_${SVARIANT_LOWER} ${LVLX_DIR}/test/basis-gen-bench.c)
target_include_directories(basis-gen.bench_${SVARIANT_LOWER} PUBLIC ${INC_COMMON} ${INC_MP} ${LVLX_DIR}/test ${INC_GF} ${INC_GF_${SVARIANT_UPPER}} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC} ../include ${INC_EC} .)
target_link_libraries(basis-gen.bench_${SVARIANT_LOWER} ${LIB_EC_${SVARIANT_UPPER}})
set(BM_BINS ${BM_BINS}
curve-arith.bench_${SVARIANT_LOWER} basis-gen.bench_${SVARIANT_LOWER} biextension.bench_${SVARIANT_LOWER}
CACHE INTERNAL "List of benchmark executables")

View File

@@ -1 +1,3 @@
set(LVLX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lvlx)
include(${SELECT_SQISIGN_VARIANT})

View File

@@ -0,0 +1,22 @@
#ifdef __APPLE__
#define CAT(A, B) _CAT(A, B)
#define _CAT(A, B) A##B
#undef fp_add
#undef fp_sub
#undef fp_mul
#undef fp_sqr
#undef fp2_mul_c0
#undef fp2_mul_c1
#undef fp2_sq_c0
#undef fp2_sq_c1
#define p2 CAT(_, p2)
#define p CAT(_, p)
#define fp_add CAT(_, SQISIGN_NAMESPACE(fp_add))
#define fp_sub CAT(_, SQISIGN_NAMESPACE(fp_sub))
#define fp_mul CAT(_, SQISIGN_NAMESPACE(fp_mul))
#define fp_sqr CAT(_, SQISIGN_NAMESPACE(fp_sqr))
#define fp2_mul_c0 CAT(_, SQISIGN_NAMESPACE(fp2_mul_c0))
#define fp2_mul_c1 CAT(_, SQISIGN_NAMESPACE(fp2_mul_c1))
#define fp2_sq_c0 CAT(_, SQISIGN_NAMESPACE(fp2_sq_c0))
#define fp2_sq_c1 CAT(_, SQISIGN_NAMESPACE(fp2_sq_c1))
#endif

View File

@@ -0,0 +1,162 @@
#ifndef FP2X_H
#define FP2X_H
#include <sqisign_namespace.h>
#include "fp.h"
#include <stdio.h>
// Structure for representing elements in GF(p^2)
typedef struct fp2_t
{
fp_t re, im;
} fp2_t;
static inline void
fp2_set_small(fp2_t *x, const uint32_t val)
{
fp_set_small(&(x->re), val);
fp_set_zero(&(x->im));
}
static inline void
fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n)
{
fp_mul_small(&x->re, &y->re, n);
fp_mul_small(&x->im, &y->im, n);
}
static inline void
fp2_set_zero(fp2_t *x)
{
fp_set_zero(&(x->re));
fp_set_zero(&(x->im));
}
static inline void
fp2_set_one(fp2_t *x)
{
fp_set_one(&(x->re));
fp_set_zero(&(x->im));
}
static inline uint32_t
fp2_is_equal(const fp2_t *a, const fp2_t *b)
{ // Compare two GF(p^2) elements in constant time
// Returns 1 (true) if a=b, 0 (false) otherwise
return fp_is_equal(&(a->re), &(b->re)) & fp_is_equal(&(a->im), &(b->im));
}
static inline uint32_t
fp2_is_zero(const fp2_t *a)
{ // Is a GF(p^2) element zero?
// Returns 1 (true) if a=0, 0 (false) otherwise
return fp_is_zero(&(a->re)) & fp_is_zero(&(a->im));
}
static inline uint32_t
fp2_is_one(const fp2_t *a)
{ // Is a GF(p^2) element one?
// Returns 1 (true) if a=0, 0 (false) otherwise
return fp_is_equal(&(a->re), &ONE) & fp_is_zero(&(a->im));
}
static inline void
fp2_half(fp2_t *x, const fp2_t *y)
{
fp_half(&(x->re), &(y->re));
fp_half(&(x->im), &(y->im));
}
static inline void
fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z)
{
fp_add(&(x->re), &(y->re), &(z->re));
fp_add(&(x->im), &(y->im), &(z->im));
}
static inline void
fp2_add_one(fp2_t *x, const fp2_t *y)
{
fp_add(&x->re, &y->re, &ONE);
fp_copy(&x->im, &y->im);
}
static inline void
fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z)
{
fp_sub(&(x->re), &(y->re), &(z->re));
fp_sub(&(x->im), &(y->im), &(z->im));
}
static inline void
fp2_neg(fp2_t *x, const fp2_t *y)
{
fp_neg(&(x->re), &(y->re));
fp_neg(&(x->im), &(y->im));
}
#ifndef NO_FP2X_MUL
static inline void
fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z)
{
fp_t t0, t1;
fp_add(&t0, &(y->re), &(y->im));
fp_add(&t1, &(z->re), &(z->im));
fp_mul(&t0, &t0, &t1);
fp_mul(&t1, &(y->im), &(z->im));
fp_mul(&(x->re), &(y->re), &(z->re));
fp_sub(&(x->im), &t0, &t1);
fp_sub(&(x->im), &(x->im), &(x->re));
fp_sub(&(x->re), &(x->re), &t1);
}
#endif
#ifndef NO_FP2X_SQR
static inline void
fp2_sqr(fp2_t *x, const fp2_t *y)
{
fp_t sum, diff;
fp_add(&sum, &(y->re), &(y->im));
fp_sub(&diff, &(y->re), &(y->im));
fp_mul(&(x->im), &(y->re), &(y->im));
fp_add(&(x->im), &(x->im), &(x->im));
fp_mul(&(x->re), &sum, &diff);
}
#endif
static inline void
fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl)
{
fp_select(&(d->re), &(a0->re), &(a1->re), ctl);
fp_select(&(d->im), &(a0->im), &(a1->im), ctl);
}
static inline void
fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl)
{
fp_cswap(&(a->re), &(b->re), ctl);
fp_cswap(&(a->im), &(b->im), ctl);
}
static inline void
fp2_copy(fp2_t *x, const fp2_t *y)
{
*x = *y;
}
// New functions
void fp2_encode(void *dst, const fp2_t *a);
uint32_t fp2_decode(fp2_t *d, const void *src);
void fp2_inv(fp2_t *x);
uint32_t fp2_is_square(const fp2_t *x);
void fp2_sqrt(fp2_t *x);
uint32_t fp2_sqrt_verify(fp2_t *a);
void fp2_batched_inv(fp2_t *x, int len);
void fp2_pow_vartime(fp2_t *out, const fp2_t *x, const uint64_t *exp, const int size);
void fp2_print(const char *name, const fp2_t *a);
#endif

View File

@@ -1,10 +1,6 @@
set(SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL
fp_asm.S fp.c fp2.c
set(SOURCE_FILES_GF_SPECIFIC
gf5248.c
fp_asm.S
)
add_library(${LIB_GF_${SVARIANT_UPPER}} ${SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL})
target_include_directories(${LIB_GF_${SVARIANT_UPPER}} PRIVATE common ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} include ${PROJECT_SOURCE_DIR}/include ${INC_COMMON})
target_compile_options(${LIB_GF_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
add_subdirectory(test)
include(../lvlx.cmake)

View File

@@ -1,46 +0,0 @@
CC=gcc
CFLAGS= -O3 -std=gnu11 -Wall -march=native -Wno-missing-braces -Wno-logical-not-parentheses
LDFLAGS=-lm
AR=ar rcs
RANLIB=ranlib
OBJECTS=objs/fp_p1913.o objs/fp.o objs/fp2.o objs/fp_asm.o objs/random.o
all: lib tests
objs/fp_p1913.o: fp_p1913.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp_p1913.c -o objs/fp_p1913.o
objs/fp.o: fp.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp.c -o objs/fp.o
objs/fp2.o: fp2.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp2.c -o objs/fp2.o
objs/fp_asm.o: fp_asm.S
$(CC) -c $(CFLAGS) fp_asm.S -o objs/fp_asm.o
objs/random.o: ../../../common/generic/randombytes_system.c
$(CC) -c $(CFLAGS) ../../../common/generic/randombytes_system.c -o objs/random.o
lib: $(OBJECTS)
rm -rf lib
mkdir lib
$(AR) lib/libtest.a $^
$(RANLIB) lib/libtest.a
tests: lib
$(CC) $(CFLAGS) -L./lib test/test_fp.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp -lgmp
$(CC) $(CFLAGS) -L./lib test/test_fp2.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp2 -lgmp
check: tests
.PHONY: clean
clean:
rm -rf *.req objs lib test_fp*

View File

@@ -1,192 +1,95 @@
#include "include/fp.h"
#include <assert.h>
#include "fp.h"
const uint64_t p[NWORDS_FIELD] = { 0xffffffffffffffff, 0x252C9E49355147FF, 0x33A6A86587407437, 0x34E29E286B95D98C };
const uint64_t R2[NWORDS_FIELD] = { 0x233625AE400674D4, 0x20AFD6C1025A1C2E, 0x30A841AB0920655D, 0x0D72E7D67C30CD3D };
const uint64_t pp[NWORDS_FIELD] = { 0x01, 0x00, 0x00, 0x00 };
const digit_t p[NWORDS_FIELD] = { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x04ffffffffffffff };
const digit_t p2[NWORDS_FIELD] = { 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0x09ffffffffffffff };
void fp_set(digit_t* x, const digit_t val)
{ // Set field element x = val, where val has wordsize
x[0] = val;
for (unsigned int i = 1; i < NWORDS_FIELD; i++) {
x[i] = 0;
}
}
void fp_mont_setone(digit_t* out1) {
out1[0] = 0x4;
out1[1] = UINT64_C(0x6b4d86db2abae000);
out1[2] = UINT64_C(0x31655e69e2fe2f23);
out1[3] = UINT64_C(0x2c75875e51a899cf);
}
bool fp_is_equal(const digit_t* a, const digit_t* b)
{ // Compare two field elements in constant time
// Returns 1 (true) if a=b, 0 (false) otherwise
digit_t r = 0;
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
r |= a[i] ^ b[i];
return (bool)is_digit_zero_ct(r);
}
bool fp_is_zero(const digit_t* a)
{ // Is a field element zero?
// Returns 1 (true) if a=0, 0 (false) otherwise
digit_t r = 0;
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
r |= a[i] ^ 0;
return (bool)is_digit_zero_ct(r);
}
void fp_copy(digit_t* out, const digit_t* a)
void
fp_sqrt(fp_t *x)
{
memcpy(out, a, NWORDS_FIELD*RADIX/8);
(void)gf5248_sqrt(x, x);
}
void fp_neg(digit_t* out, const digit_t* a)
{ // Modular negation, out = -a mod p
// Input: a in [0, p-1]
// Output: out in [0, p-1]
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(out[i], borrow, ((digit_t*)p)[i], a[i], borrow);
}
fp_sub(out, out, (digit_t*)p);
uint32_t
fp_is_square(const fp_t *a)
{
// ls is (0, 1, -1) and we want fp_is_square
// to return 0xFF..FF when ls is 1 or 0 and 0x00..00 otherwise
int32_t ls = gf5248_legendre(a);
return ~(uint32_t)(ls >> 1);
}
void fp_tomont(digit_t* out, const digit_t* a)
{ // Conversion to Montgomery representation
// out = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
fp_mul(out, a, (digit_t*)&R2);
void
fp_inv(fp_t *x)
{
(void)gf5248_invert(x, x);
}
void fp_frommont(digit_t* out, const digit_t* a)
{ // Conversion from Montgomery representation to standard representation
// out = a*R^(-1) mod p, where a in [0, p-1].
digit_t one[NWORDS_FIELD] = {0};
one[0] = 1;
fp_mul(out, a, one);
void
fp_exp3div4(fp_t *a)
{
//
// We optimise this by using the shape of the prime
// to avoid almost all multiplications:
//
// We write:
// (p - 3) / 4 = (5*2^248 - 4) / 4
// = 5*2^246 - 1
// = 5*(2^246 - 1) + 4
// Then we first compute:
// a246 = a**(2^246 - 1)
// Then from this we get the desired result as:
// a**((p-3)/4) = a246**5 * a**4
// We can compute this with 12 multiplications and 247 squares.
fp_t z4, t3, t6, tmp;
// Compute a**3 and a**4
fp_sqr(&z4, a);
fp_mul(&tmp, a, &z4);
fp_sqr(&z4, &z4);
// Compute a**(2^3 - 1) = a**7
fp_mul(&t3, &tmp, &z4);
// Compute a**(2^6 - 1)
fp_sqr(&t6, &t3);
for (int i = 1; i < 3; i++)
fp_sqr(&t6, &t6);
fp_mul(&t6, &t6, &t3);
// Compute a**(2^12 - 1)
fp_sqr(a, &t6);
for (int i = 1; i < 6; i++)
fp_sqr(a, a);
fp_mul(a, a, &t6);
// Compute a**(2^15 - 1)
for (int i = 0; i < 3; i++)
fp_sqr(a, a);
fp_mul(a, a, &t3);
// Compute a**(2^30 - 1)
fp_sqr(&tmp, a);
for (int i = 1; i < 15; i++)
fp_sqr(&tmp, &tmp);
fp_mul(a, a, &tmp);
// Compute a**(2^60 - 1)
fp_sqr(&tmp, a);
for (int i = 1; i < 30; i++)
fp_sqr(&tmp, &tmp);
fp_mul(a, a, &tmp);
// Compute a**(2^120 - 1)
fp_sqr(&tmp, a);
for (int i = 1; i < 60; i++)
fp_sqr(&tmp, &tmp);
fp_mul(a, a, &tmp);
// Compute a**(2^123 - 1)
for (int i = 0; i < 3; i++)
fp_sqr(a, a);
fp_mul(a, a, &t3);
// Compute a**(2^246 - 1)
fp_sqr(&tmp, a);
for (int i = 1; i < 123; i++)
fp_sqr(&tmp, &tmp);
fp_mul(a, a, &tmp);
// Compute a**(5*(2^246 - 1))
fp_sqr(&tmp, a);
fp_sqr(&tmp, &tmp);
fp_mul(a, a, &tmp);
// Compute a**(5*(2^246 - 1) + 4)
fp_mul(a, a, &z4);
}
void MUL(digit_t* out, const digit_t a, const digit_t b)
{ // Digit multiplication, digit*digit -> 2-digit result
// Inputs: a, b in [0, 2^w-1], where w is the computer wordsize
// Output: 0 < out < 2^(2w)-1
register digit_t al, ah, bl, bh, temp;
digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
al = a & mask_low; // Low part
ah = a >> (sizeof(digit_t)*4); // High part
bl = b & mask_low;
bh = b >> (sizeof(digit_t)*4);
albl = al * bl;
albh = al * bh;
ahbl = ah * bl;
ahbh = ah * bh;
out[0] = albl & mask_low; // out00
res1 = albl >> (sizeof(digit_t)*4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(digit_t)*4);
out[0] ^= temp << (sizeof(digit_t)*4); // out01
res1 = ahbl >> (sizeof(digit_t)*4);
res2 = albh >> (sizeof(digit_t)*4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
out[1] = temp & mask_low; // out10
carry = temp & mask_high;
out[1] ^= (ahbh & mask_high) + carry; // out11
}
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords)
{ // Multiprecision right shift
digit_t bit_out = x[0] & 1;
for (unsigned int i = 0; i < nwords-1; i++) {
SHIFTR(x[i+1], x[i], shift, x[i], RADIX);
}
x[nwords-1] >>= shift;
return bit_out;
}
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords)
{ // Multiprecision left shift
for (int i = nwords-1; i > 0; i--) {
SHIFTL(x[i], x[i-1], shift, x[i], RADIX);
}
x[0] <<= shift;
}
static void fp_exp3div4(digit_t* out, const digit_t* a)
{ // Fixed exponentiation out = a^((p-3)/4) mod p
// Input: a in [0, p-1]
// Output: out in [0, p-1]
// Requirement: p = 3(mod 4)
fp_t p_t, acc;
digit_t bit;
memcpy((digit_t*)p_t, (digit_t*)p, NWORDS_FIELD*RADIX/8);
memcpy((digit_t*)acc, (digit_t*)a, NWORDS_FIELD*RADIX/8);
mp_shiftr(p_t, 1, NWORDS_FIELD);
mp_shiftr(p_t, 1, NWORDS_FIELD);
fp_set(out, 1);
fp_tomont(out, out);
for (int i = 0; i < NWORDS_FIELD*RADIX-2; i++) {
bit = p_t[0] & 1;
mp_shiftr(p_t, 1, NWORDS_FIELD);
if (bit == 1) {
fp_mul(out, out, acc);
}
fp_sqr(acc, acc);
}
}
void fp_inv(digit_t* a)
{ // Modular inversion, out = x^-1*R mod p, where R = 2^(w*nwords), w is the computer wordsize and nwords is the number of words to represent p
// Input: a=xR in [0, p-1]
// Output: out in [0, p-1]. It outputs 0 if the input does not have an inverse
// Requirement: Ceiling(Log(p)) < w*nwords
fp_t t;
fp_exp3div4(t, a);
fp_sqr(t, t);
fp_sqr(t, t);
fp_mul(a, t, a); // a^(p-2)
}
bool fp_is_square(const digit_t* a)
{ // Is field element a square?
// Output: out = 0 (false), 1 (true)
fp_t t, one;
fp_exp3div4(t, a);
fp_sqr(t, t);
fp_mul(t, t, a); // a^((p-1)/2)
fp_frommont(t, t);
fp_set(one, 1);
return fp_is_equal(t, one);
}
void fp_sqrt(digit_t* a)
{ // Square root computation, out = a^((p+1)/4) mod p
fp_t t;
fp_exp3div4(t, a);
fp_mul(a, t, a); // a^((p+1)/4)
}

View File

@@ -1,190 +0,0 @@
#include <fp2.h>
extern const digit_t R[NWORDS_FIELD];
extern void fp2_sq_c0(fp2_t *out, const fp2_t *in);
extern void fp2_sq_c1(fp_t *out, const fp2_t *in);
extern void fp2_mul_c0(fp_t *out, const fp2_t *in0, const fp2_t *in1);
extern void fp2_mul_c1(fp_t *out, const fp2_t *in0, const fp2_t *in1);
/* Arithmetic modulo X^2 + 1 */
void fp2_set(fp2_t* x, const digit_t val)
{
fp_set(x->re, val);
fp_set(x->im, 0);
}
bool fp2_is_zero(const fp2_t* a)
{ // Is a GF(p^2) element zero?
// Returns 1 (true) if a=0, 0 (false) otherwise
return fp_is_zero(a->re) & fp_is_zero(a->im);
}
bool fp2_is_equal(const fp2_t* a, const fp2_t* b)
{ // Compare two GF(p^2) elements in constant time
// Returns 1 (true) if a=b, 0 (false) otherwise
return fp_is_equal(a->re, b->re) & fp_is_equal(a->im, b->im);
}
void fp2_copy(fp2_t* x, const fp2_t* y)
{
fp_copy(x->re, y->re);
fp_copy(x->im, y->im);
}
fp2_t fp2_non_residue()
{ // 2 + i is a quadratic non-residue for p1913
fp_t one = {0};
fp2_t res;
one[0] = 1;
fp_tomont(one, one);
fp_add(res.re, one, one);
fp_copy(res.im, one);
return res;
}
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_add(x->re, y->re, z->re);
fp_add(x->im, y->im, z->im);
}
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_sub(x->re, y->re, z->re);
fp_sub(x->im, y->im, z->im);
}
void fp2_neg(fp2_t* x, const fp2_t* y)
{
fp_neg(x->re, y->re);
fp_neg(x->im, y->im);
}
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_t t;
fp2_mul_c0(&t, y, z); // c0 = a0*b0 - a1*b1
fp2_mul_c1(&x->im, y, z); // c1 = a0*b1 + a1*b0
x->re[0] = t[0]; x->re[1] = t[1]; x->re[2] = t[2]; x->re[3] = t[3];
}
void fp2_sqr(fp2_t* x, const fp2_t* y) {
fp2_t t;
fp2_sq_c0(&t, y); // c0 = (a0+a1)(a0-a1)
fp2_sq_c1(&x->im, y); // c1 = 2a0*a1
x->re[0] = t.re[0]; x->re[1] = t.re[1]; x->re[2] = t.re[2]; x->re[3] = t.re[3];
}
void fp2_inv(fp2_t* x)
{
fp_t t0, t1;
fp_sqr(t0, x->re);
fp_sqr(t1, x->im);
fp_add(t0, t0, t1);
fp_inv(t0);
fp_mul(x->re, x->re, t0);
fp_mul(x->im, x->im, t0);
fp_neg(x->im, x->im);
}
bool fp2_is_square(const fp2_t* x)
{
fp_t t0, t1;
fp_sqr(t0, x->re);
fp_sqr(t1, x->im);
fp_add(t0, t0, t1);
return fp_is_square(t0);
}
void fp2_frob(fp2_t* x, const fp2_t* y)
{
memcpy((digit_t*)x->re, (digit_t*)y->re, NWORDS_FIELD*RADIX/8);
fp_neg(x->im, y->im);
}
void fp2_tomont(fp2_t* x, const fp2_t* y)
{
fp_tomont(x->re, y->re);
fp_tomont(x->im, y->im);
}
void fp2_frommont(fp2_t* x, const fp2_t* y)
{
fp_frommont(x->re, y->re);
fp_frommont(x->im, y->im);
}
// NOTE: old, non-constant-time implementation. Could be optimized
void fp2_sqrt(fp2_t* x)
{
fp_t sdelta, re, tmp1, tmp2, inv2, im;
if (fp_is_zero(x->im)) {
if (fp_is_square(x->re)) {
fp_sqrt(x->re);
return;
} else {
fp_neg(x->im, x->re);
fp_sqrt(x->im);
fp_set(x->re, 0);
return;
}
}
// sdelta = sqrt(re^2 + im^2)
fp_sqr(sdelta, x->re);
fp_sqr(tmp1, x->im);
fp_add(sdelta, sdelta, tmp1);
fp_sqrt(sdelta);
fp_set(inv2, 2);
fp_tomont(inv2, inv2); // inv2 <- 2
fp_inv(inv2);
fp_add(re, x->re, sdelta);
fp_mul(re, re, inv2);
memcpy((digit_t*)tmp2, (digit_t*)re, NWORDS_FIELD*RADIX/8);
if (!fp_is_square(tmp2)) {
fp_sub(re, x->re, sdelta);
fp_mul(re, re, inv2);
}
fp_sqrt(re);
memcpy((digit_t*)im, (digit_t*)re, NWORDS_FIELD*RADIX/8);
fp_inv(im);
fp_mul(im, im, inv2);
fp_mul(x->im, im, x->im);
memcpy((digit_t*)x->re, (digit_t*)re, NWORDS_FIELD*RADIX/8);
}
// Lexicographic comparison of two field elements. Returns +1 if x > y, -1 if x < y, 0 if x = y
int fp2_cmp(fp2_t* x, fp2_t* y){
fp2_t a, b;
fp2_frommont(&a, x);
fp2_frommont(&b, y);
for(int i = NWORDS_FIELD-1; i >= 0; i--){
if(a.re[i] > b.re[i])
return 1;
if(a.re[i] < b.re[i])
return -1;
}
for(int i = NWORDS_FIELD-1; i >= 0; i--){
if(a.im[i] > b.im[i])
return 1;
if(a.im[i] < b.im[i])
return -1;
}
return 0;
}

237
src/gf/broadwell/lvl1/fp_asm.S Normal file → Executable file
View File

@@ -1,17 +1,27 @@
#include <sqisign_namespace.h>
.intel_syntax noprefix
.set pbytes,32
.set plimbs,4
.global p_plus_1
p_plus_1: .quad 0x0000000000000000, 0x252C9E4935514800, 0x33A6A86587407437, 0x34E29E286B95D98C
#ifdef __APPLE__
.section __TEXT,__const
#else
.section .rodata
#endif
p_plus_1: .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0500000000000000
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",@progbits
#endif
#include <asm_preamble.h>
.text
.p2align 4,,15
.global fp_add
fp_add:
push r12
xor rax, rax
mov r8, [rsi]
mov r9, [rsi+8]
@@ -20,36 +30,34 @@ fp_add:
add r8, [rdx]
adc r9, [rdx+8]
adc r10, [rdx+16]
adc r11, [rdx+24]
mov r12, [rip+p]
sub r8, r12
mov rcx, [rip+p+8]
sbb r9, rcx
mov rsi, [rip+p+16]
sbb r10, rsi
adc r11, [rdx+24]
mov rax, r11
shr rax, 59
neg rax
mov rdx, [rip+p+24]
sbb r11, rdx
sbb rax, 0
and r12, rax
and rcx, rax
and rsi, rax
and rdx, rax
sub r8, rax
sbb r9, rax
sbb r10, rax
sbb r11, rdx
add r8, r12
adc r9, rcx
adc r10, rsi
adc r11, rdx
mov rax, r11
shr rax, 59
neg rax
mov rdx, [rip+p+24]
and rdx, rax
sub r8, rax
sbb r9, rax
sbb r10, rax
sbb r11, rdx
mov [rdi], r8
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
pop r12
ret
.global fp_sub
fp_sub:
push r12
xor rax, rax
mov r8, [rsi]
mov r9, [rsi+8]
@@ -61,23 +69,26 @@ fp_sub:
sbb r11, [rdx+24]
sbb rax, 0
mov r12, [rip+p]
mov rcx, [rip+p+8]
mov rsi, [rip+p+16]
mov rdx, [rip+p+24]
and r12, rax
and rcx, rax
and rsi, rax
and rdx, rax
add r8, r12
adc r9, rcx
adc r10, rsi
add r8, rax
adc r9, rax
adc r10, rax
adc r11, rdx
mov rax, r11
sar rax, 59
mov rdx, [rip+p+24]
and rdx, rax
add r8, rax
adc r9, rax
adc r10, rax
adc r11, rdx
mov [rdi], r8
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
pop r12
ret
///////////////////////////////////////////////////////////////// MACROS
@@ -105,18 +116,11 @@ fp_sub:
adc \Z4, 0
.endm
.macro MULADD64x192 M1, Z0, Z1, Z2, Z3, T0, T1
.macro MULADD64x64 M1, Z0, Z1, Z2, Z3, T0, T1
mulx \T0, \T1, \M1 // A0*B0
xor rax, rax
adox \Z0, \T1
adox \Z1, \T0
mulx \T0, \T1, 8\M1 // A0*B1
adcx \Z1, \T1
adox \Z2, \T0
mulx \T0, \T1, 16\M1 // A0*B2
adcx \Z2, \T1
adox \Z2, \T1
adox \Z3, \T0
adc \Z3, 0
.endm
//***********************************************************************
@@ -133,13 +137,13 @@ fp2_mul_c0:
push r14
mov rcx, rdx
// [rdi0:3] <- p - b1
mov r8, [rip+p]
mov r9, [rip+p+8]
mov r10, [rip+p+16]
mov r11, [rip+p+24]
// [rdi0:3] <- 2p - b1
mov r8, [rip+p2]
mov r9, [rip+p2+8]
mov r10, r9
mov r11, [rip+p2+24]
mov rax, [rcx+32]
mov rdx, [rcx+40]
mov rdx, [rcx+40]
sub r8, rax
sbb r9, rdx
mov rax, [rcx+48]
@@ -167,7 +171,7 @@ fp2_mul_c0:
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r8 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
MULADD64x64 [rip+p_plus_1+24], r9, r10, r11, r12, r13, r14
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
mov rdx, [rcx+8]
@@ -176,7 +180,7 @@ fp2_mul_c0:
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r9 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
MULADD64x64 [rip+p_plus_1+24], r10, r11, r12, r8, r13, r14
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
mov rdx, [rcx+16]
@@ -185,7 +189,7 @@ fp2_mul_c0:
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r10 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
MULADD64x64 [rip+p_plus_1+24], r11, r12, r8, r9, r13, r14
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
mov rdx, [rcx+24]
@@ -194,27 +198,8 @@ fp2_mul_c0:
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r11 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
MULADD64x64 [rip+p_plus_1+24], r12, r8, r9, r10, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
@@ -254,7 +239,7 @@ fp2_mul_c1:
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r8 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
MULADD64x64 [rip+p_plus_1+24], r9, r10, r11, r12, r13, r14
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
mov rdx, [rcx+40]
@@ -263,7 +248,7 @@ fp2_mul_c1:
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r9 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
MULADD64x64 [rip+p_plus_1+24], r10, r11, r12, r8, r13, r14
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
mov rdx, [rcx+48]
@@ -272,7 +257,7 @@ fp2_mul_c1:
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r10 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
MULADD64x64 [rip+p_plus_1+24], r11, r12, r8, r9, r13, r14
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
mov rdx, [rcx+56]
@@ -281,27 +266,8 @@ fp2_mul_c1:
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r11 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
MULADD64x64 [rip+p_plus_1+24], r12, r8, r9, r10, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
@@ -322,28 +288,28 @@ fp2_mul_c1:
.macro FPMUL256x256 M0, M1, Z0, Z1, Z2, Z3, Z4, T0, T1
// [Z1:Z4] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z0 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z1, \Z2, \Z3, \Z4, \T0, \T1
MULADD64x64 [rip+p_plus_1+24], \Z1, \Z2, \Z3, \Z4, \T0, \T1
// [Z1:Z4, Z0] <- z = a01 x a1 + z
mov rdx, 8\M0
MULADD64x256 \M1, \Z1, \Z2, \Z3, \Z4, \Z0, \T0, \T1, \Z0
// [Z2:Z4, Z0] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z1 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z2, \Z3, \Z4, \Z0, \T0, \T1
MULADD64x64 [rip+p_plus_1+24], \Z2, \Z3, \Z4, \Z0, \T0, \T1
// [Z2:Z4, Z0:Z1] <- z = a02 x a1 + z
mov rdx, 16\M0
MULADD64x256 \M1, \Z2, \Z3, \Z4, \Z0, \Z1, \T0, \T1, \Z1
// [Z3:Z4, Z0:Z1] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z2 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z3, \Z4, \Z0, \Z1, \T0, \T1
MULADD64x64 [rip+p_plus_1+24], \Z3, \Z4, \Z0, \Z1, \T0, \T1
// [Z3:Z4, Z0:Z2] <- z = a03 x a1 + z
mov rdx, 24\M0
MULADD64x256 \M1, \Z3, \Z4, \Z0, \Z1, \Z2, \T0, \T1, \Z2
// [Z4, Z0:Z2] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z3 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z4, \Z0, \Z1, \Z2, \T0, \T1
MULADD64x64 [rip+p_plus_1+24], \Z4, \Z0, \Z1, \Z2, \T0, \T1
.endm
//***********************************************************************
@@ -371,19 +337,21 @@ fp2_sq_c0:
mov [rdi+16], r10
mov [rdi+24], r11
// a0 - a1 + p
// a0 - a1 + 2p
mov r8, [rsi]
mov r10, [rsi+8]
mov r12, [rsi+16]
mov r13, [rsi+24]
sub r8, [rsi+32]
sbb r10, [rsi+40]
sbb r12, [rsi+48]
sbb r12, [rsi+48]
sbb r13, [rsi+56]
add r8, [rip+p]
adc r10, [rip+p+8]
adc r12, [rip+p+16]
adc r13, [rip+p+24]
mov rax, [rip+p2]
add r8, rax
mov rax, [rip+p2+8]
adc r10, rax
adc r12, rax
adc r13, [rip+p2+24]
mov [rdi+32], r8
mov [rdi+40], r10
mov [rdi+48], r12
@@ -402,25 +370,6 @@ fp2_sq_c0:
FPMUL256x256 [rdi], [rdi+32], r8, r9, r10, r11, r12, r13, rcx
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
@@ -465,27 +414,8 @@ fp2_sq_c1:
adox r12, rax
FPMUL256x256 [rsp], [rsi+32], r8, r9, r10, r11, r12, r13, rcx
add rsp, 32
add rsp, 32
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
@@ -521,26 +451,7 @@ fp_mul:
FPMUL256x256 [rcx], [rsi], r8, r9, r10, r11, r12, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
@@ -552,4 +463,4 @@ fp_mul:
.global fp_sqr
fp_sqr:
mov rdx, rsi
jmp fp_mul
jmp fp_mul

View File

@@ -0,0 +1,767 @@
/*
* This code is derived from discussions with Thomas Pornin
*/
#include "gf5248.h"
// see gf5248.h
const gf5248 gf5248_ZERO = { 0, 0, 0, 0 };
// see gf5248.h
const gf5248 gf5248_ONE = { 0x0000000000000033, 0x0000000000000000, 0x0000000000000000, 0x0100000000000000 };
// see gf5248.h
const gf5248 gf5248_MINUS_ONE = { 0xFFFFFFFFFFFFFFCC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x03FFFFFFFFFFFFFF };
// Montgomery representation of 2^256.
static const gf5248 R2 = { 0x3333333333333d70, 0x3333333333333333, 0x3333333333333333, 0x0333333333333333 };
// The modulus itself (this is also a valid representation of zero).
static const gf5248 MODULUS = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x04FFFFFFFFFFFFFF };
// 1/2^244 (in Montgomery representation).
static const gf5248 INVT244 = { 0x0000000000001000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 };
static const gf5248 PM1O3 = { 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0xaaaaaaaaaaaaaaaa, 0x01aaaaaaaaaaaaaa };
// Normalize value *a into *d.
static inline void
inner_gf5248_normalize(gf5248 *d, const gf5248 *a)
{
uint64_t d0, d1, d2, d3, m;
unsigned char cc;
// Subtract q.
cc = inner_gf5248_sbb(0, a->v0, 0xFFFFFFFFFFFFFFFF, &d0);
cc = inner_gf5248_sbb(cc, a->v1, 0xFFFFFFFFFFFFFFFF, &d1);
cc = inner_gf5248_sbb(cc, a->v2, 0xFFFFFFFFFFFFFFFF, &d2);
cc = inner_gf5248_sbb(cc, a->v3, 0x04FFFFFFFFFFFFFF, &d3);
// Add back q if the result is negative.
(void)inner_gf5248_sbb(cc, 0, 0, &m);
cc = inner_gf5248_adc(0, d0, m, &d0);
cc = inner_gf5248_adc(cc, d1, m, &d1);
cc = inner_gf5248_adc(cc, d2, m, &d2);
(void)inner_gf5248_adc(cc, d3, m & 0x04FFFFFFFFFFFFFF, &d3);
d->v0 = d0;
d->v1 = d1;
d->v2 = d2;
d->v3 = d3;
}
// Expand the most significant bit of x into a full-width 64-bit word
// (0x0000000000000000 or 0xFFFFFFFFFFFFFFFF).
static inline uint64_t
sgnw(uint64_t x)
{
return (uint64_t)(*(int64_t *)&x >> 63);
}
// d <- u*f + v*g (in the field)
// Coefficients f and g are provided as unsigned integers, but they
// really are signed values which must be less than 2^62 (in absolute value).
static void
gf5248_lin(gf5248 *d, const gf5248 *u, const gf5248 *v, uint64_t f, uint64_t g)
{
// f <- abs(f), keeping the sign in sf, and negating u accordingly
uint64_t sf = sgnw(f);
f = (f ^ sf) - sf;
gf5248 tu;
gf5248_neg(&tu, u);
gf5248_select(&tu, u, &tu, (uint32_t)sf);
// g <- abs(g), keeping the sign in sg, and negating v accordingly
uint64_t sg = sgnw(g);
g = (g ^ sg) - sg;
gf5248 tv;
gf5248_neg(&tv, v);
gf5248_select(&tv, v, &tv, (uint32_t)sg);
// Linear combination over plain integers.
uint64_t d0, d1, d2, d3, t;
inner_gf5248_umul_x2(d0, t, tu.v0, f, tv.v0, g);
inner_gf5248_umul_x2_add(d1, t, tu.v1, f, tv.v1, g, t);
inner_gf5248_umul_x2_add(d2, t, tu.v2, f, tv.v2, g, t);
inner_gf5248_umul_x2_add(d3, t, tu.v3, f, tv.v3, g, t);
// Reduction: split into low part (248 bits) and high part
// (71 bits, since t can be up to 63 bits). If the high
// part is h, then:
// h*2^248 = (h mod 5)*2^248 + floor(h/5) mod q
uint64_t h0 = (d3 >> 56) | (t << 8);
uint64_t h1 = t >> 56;
d3 &= 0x00FFFFFFFFFFFFFF;
uint64_t z0, z1, quo0, rem0, quo1, rem1;
inner_gf5248_umul(z0, z1, h0, 0xCCCCCCCCCCCCCCCD);
(void)z0;
quo0 = z1 >> 2;
rem0 = h0 - (5 * quo0);
quo1 = (h1 * 0xCD) >> 10;
rem1 = h1 - (5 * quo1);
// h = rem0 + 5*quo0 + (rem1 + 5*quo1)*2^64
// = rem0 + rem1 + 5*(quo0 + quo1*2^64 + rem1*((2^64 - 1)/5))
// We add rem0 and rem1 modulo 5, with an extra carry that
// goes into the folded part (multiple of 5).
uint64_t e, f0, f1;
unsigned char cc;
cc = inner_gf5248_adc(0, rem0 + 0xFFFFFFFFFFFFFFFA, rem1, &e);
cc = inner_gf5248_adc(cc, quo0, rem1 * 0x3333333333333333, &f0);
(void)inner_gf5248_adc(cc, quo1, 0, &f1);
e -= 0xFFFFFFFFFFFFFFFA;
// Now we only have to add e*2^248 + f0:f1 to the low part.
cc = inner_gf5248_adc(0, d0, f0, &d0);
cc = inner_gf5248_adc(cc, d1, f1, &d1);
cc = inner_gf5248_adc(cc, d2, 0, &d2);
(void)inner_gf5248_adc(cc, d3, e << 56, &d3);
d->v0 = d0;
d->v1 = d1;
d->v2 = d2;
d->v3 = d3;
}
// d <- abs(floor((a*f + b*g) / 2^31))
// Coefficients f and g are provided as unsigned integer, but they really
// are signed values, which MUST be at most 2^31 in absolute value.
// The computation is performed over the integers, not modulo q. The low
// 31 bits are dropped (in practice, callers provided appropriate coefficients
// f and g such that a*f + b*g is a multiple of 2^31.
//
// If a*f + b*g is negative, then the absolute value is computed, and the
// function returns 0xFFFFFFFFFFFFFFFF; otherwise, the function returns
// 0x0000000000000000.
static uint64_t
lindiv31abs(gf5248 *d, const gf5248 *a, const gf5248 *b, uint64_t f, uint64_t g)
{
// f <- abs(f), keeping the sign in sf
uint64_t sf = sgnw(f);
f = (f ^ sf) - sf;
// g <- abs(g), keeping the sign in sg
uint64_t sg = sgnw(g);
g = (g ^ sg) - sg;
// Apply the signs of f and g to the source operands.
uint64_t a0, a1, a2, a3, a4;
uint64_t b0, b1, b2, b3, b4;
unsigned char cc;
cc = inner_gf5248_sbb(0, a->v0 ^ sf, sf, &a0);
cc = inner_gf5248_sbb(cc, a->v1 ^ sf, sf, &a1);
cc = inner_gf5248_sbb(cc, a->v2 ^ sf, sf, &a2);
cc = inner_gf5248_sbb(cc, a->v3 ^ sf, sf, &a3);
(void)inner_gf5248_sbb(cc, 0, 0, &a4);
cc = inner_gf5248_sbb(0, b->v0 ^ sg, sg, &b0);
cc = inner_gf5248_sbb(cc, b->v1 ^ sg, sg, &b1);
cc = inner_gf5248_sbb(cc, b->v2 ^ sg, sg, &b2);
cc = inner_gf5248_sbb(cc, b->v3 ^ sg, sg, &b3);
(void)inner_gf5248_sbb(cc, 0, 0, &b4);
// Compute a*f + b*g into d0:d1:d2:d3:d4. Since f and g are at
// most 2^31, we can add two 128-bit products with no overflow.
// Note: a4 and b4 are both in {0, -1}.
uint64_t d0, d1, d2, d3, d4, t;
inner_gf5248_umul_x2(d0, t, a0, f, b0, g);
inner_gf5248_umul_x2_add(d1, t, a1, f, b1, g, t);
inner_gf5248_umul_x2_add(d2, t, a2, f, b2, g, t);
inner_gf5248_umul_x2_add(d3, t, a3, f, b3, g, t);
d4 = t - (a4 & f) - (b4 & g);
// Right-shift the value by 31 bits.
d0 = (d0 >> 31) | (d1 << 33);
d1 = (d1 >> 31) | (d2 << 33);
d2 = (d2 >> 31) | (d3 << 33);
d3 = (d3 >> 31) | (d4 << 33);
// If the result is negative, negate it.
t = sgnw(d4);
cc = inner_gf5248_sbb(0, d0 ^ t, t, &d0);
cc = inner_gf5248_sbb(cc, d1 ^ t, t, &d1);
cc = inner_gf5248_sbb(cc, d2 ^ t, t, &d2);
(void)inner_gf5248_sbb(cc, d3 ^ t, t, &d3);
d->v0 = d0;
d->v1 = d1;
d->v2 = d2;
d->v3 = d3;
return t;
}
// lzcnt(x) returns the number of leading bits of value 0 in x. It supports
// x == 0 (in which case the function returns 64).
#if defined __LZCNT__
static inline uint64_t
lzcnt(uint64_t x)
{
return _lzcnt_u64(x);
}
#else
static inline uint64_t
lzcnt(uint64_t x)
{
uint64_t m, s;
m = sgnw((x >> 32) - 1);
s = m & 32;
x = (x >> 32) ^ (m & (x ^ (x >> 32)));
m = sgnw((x >> 16) - 1);
s |= m & 16;
x = (x >> 16) ^ (m & (x ^ (x >> 16)));
m = sgnw((x >> 8) - 1);
s |= m & 8;
x = (x >> 8) ^ (m & (x ^ (x >> 8)));
m = sgnw((x >> 4) - 1);
s |= m & 4;
x = (x >> 4) ^ (m & (x ^ (x >> 4)));
m = sgnw((x >> 2) - 1);
s |= m & 2;
x = (x >> 2) ^ (m & (x ^ (x >> 2)));
// At this point, x fits on 2 bits. Count of extra zeros:
// x = 0 -> 2
// x = 1 -> 1
// x = 2 -> 0
// x = 3 -> 0
s += (2 - x) & ((x - 3) >> 2);
return s;
}
#endif
// see gf5248.h
uint32_t
gf5248_div(gf5248 *d, const gf5248 *x, const gf5248 *y)
{
// Extended binary GCD:
//
// a <- y
// b <- q (modulus)
// u <- x (self)
// v <- 0
//
// Value a is normalized (in the 0..q-1 range). Values a and b are
// then considered as (signed) integers. Values u and v are field
// elements.
//
// Invariants:
// a*x = y*u mod q
// b*x = y*v mod q
// b is always odd
//
// At each step:
// if a is even, then:
// a <- a/2, u <- u/2 mod q
// else:
// if a < b:
// (a, u, b, v) <- (b, v, a, u)
// a <- (a-b)/2, u <- (u-v)/2 mod q
//
// What we implement below is the optimized version of this
// algorithm, as described in https://eprint.iacr.org/2020/972
gf5248 a, b, u, v;
uint64_t xa, xb, f0, g0, f1, g1;
uint32_t r;
r = ~gf5248_iszero(y);
inner_gf5248_normalize(&a, y);
b = MODULUS;
u = *x;
v = gf5248_ZERO;
// Generic loop does 15*31 = 465 inner iterations.
for (int i = 0; i < 15; i++) {
// Get approximations of a and b over 64 bits:
// - If len(a) <= 64 and len(b) <= 64, then we just use
// their values (low limbs).
// - Otherwise, with n = max(len(a), len(b)), we use:
// (a mod 2^31) + 2^31*floor(a / 2^(n - 33))
// (b mod 2^31) + 2^31*floor(b / 2^(n - 33))
uint64_t m3 = a.v3 | b.v3;
uint64_t m2 = a.v2 | b.v2;
uint64_t m1 = a.v1 | b.v1;
uint64_t tnz3 = sgnw(m3 | -m3);
uint64_t tnz2 = sgnw(m2 | -m2) & ~tnz3;
uint64_t tnz1 = sgnw(m1 | -m1) & ~tnz3 & ~tnz2;
uint64_t tnzm = (m3 & tnz3) | (m2 & tnz2) | (m1 & tnz1);
uint64_t tnza = (a.v3 & tnz3) | (a.v2 & tnz2) | (a.v1 & tnz1);
uint64_t tnzb = (b.v3 & tnz3) | (b.v2 & tnz2) | (b.v1 & tnz1);
uint64_t snza = (a.v2 & tnz3) | (a.v1 & tnz2) | (a.v0 & tnz1);
uint64_t snzb = (b.v2 & tnz3) | (b.v1 & tnz2) | (b.v0 & tnz1);
// If both len(a) <= 64 and len(b) <= 64, then:
// tnzm = 0
// tnza = 0, snza = 0, tnzb = 0, snzb = 0
// Otherwise:
// tnzm != 0
// tnza contains the top non-zero limb of a
// snza contains the limb right below tnza
// tnzb contains the top non-zero limb of a
// snzb contains the limb right below tnzb
//
// We count the number of leading zero bits in tnzm:
// - If s <= 31, then the top 31 bits can be extracted from
// tnza and tnzb alone.
// - If 32 <= s <= 63, then we need some bits from snza and
// snzb as well.
int64_t s = lzcnt(tnzm);
uint64_t sm = (uint64_t)((31 - s) >> 63);
tnza ^= sm & (tnza ^ ((tnza << 32) | (snza >> 32)));
tnzb ^= sm & (tnzb ^ ((tnzb << 32) | (snzb >> 32)));
s -= 32 & sm;
tnza <<= s;
tnzb <<= s;
// At this point:
// - If len(a) <= 64 and len(b) <= 64, then:
// tnza = 0
// tnzb = 0
// tnz1 = tnz2 = tnz3 = 0
// we want to use the entire low words of a and b
// - Otherwise, we want to use the top 33 bits of tnza and
// tnzb, and the low 31 bits of the low words of a and b.
uint64_t tzx = ~(tnz1 | tnz2 | tnz3);
tnza |= a.v0 & tzx;
tnzb |= b.v0 & tzx;
xa = (a.v0 & 0x7FFFFFFF) | (tnza & 0xFFFFFFFF80000000);
xb = (b.v0 & 0x7FFFFFFF) | (tnzb & 0xFFFFFFFF80000000);
// Compute the 31 inner iterations on xa and xb.
uint64_t fg0 = (uint64_t)1;
uint64_t fg1 = (uint64_t)1 << 32;
for (int j = 0; j < 31; j++) {
uint64_t a_odd, swap, t0, t1, t2;
unsigned char cc;
a_odd = -(xa & 1);
cc = inner_gf5248_sbb(0, xa, xb, &t0);
(void)inner_gf5248_sbb(cc, 0, 0, &swap);
swap &= a_odd;
t1 = swap & (xa ^ xb);
xa ^= t1;
xb ^= t1;
t2 = swap & (fg0 ^ fg1);
fg0 ^= t2;
fg1 ^= t2;
xa -= a_odd & xb;
fg0 -= a_odd & fg1;
xa >>= 1;
fg1 <<= 1;
}
fg0 += 0x7FFFFFFF7FFFFFFF;
fg1 += 0x7FFFFFFF7FFFFFFF;
f0 = (fg0 & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g0 = (fg0 >> 32) - (uint64_t)0x7FFFFFFF;
f1 = (fg1 & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g1 = (fg1 >> 32) - (uint64_t)0x7FFFFFFF;
// Propagate updates to a, b, u and v.
gf5248 na, nb, nu, nv;
uint64_t nega = lindiv31abs(&na, &a, &b, f0, g0);
uint64_t negb = lindiv31abs(&nb, &a, &b, f1, g1);
f0 = (f0 ^ nega) - nega;
g0 = (g0 ^ nega) - nega;
f1 = (f1 ^ negb) - negb;
g1 = (g1 ^ negb) - negb;
gf5248_lin(&nu, &u, &v, f0, g0);
gf5248_lin(&nv, &u, &v, f1, g1);
a = na;
b = nb;
u = nu;
v = nv;
}
// If y is invertible, then the final GCD is 1, and
// len(a) + len(b) <= 37, so we can end the computation with
// the low words directly. We only need 35 iterations to reach
// the point where b = 1.
//
// If y is zero, then v is unchanged (hence zero) and none of
// the subsequent iterations will change it either, so we get
// 0 on output, which is what we want.
xa = a.v0;
xb = b.v0;
f0 = 1;
g0 = 0;
f1 = 0;
g1 = 1;
for (int j = 0; j < 35; j++) {
uint64_t a_odd, swap, t0, t1, t2, t3;
unsigned char cc;
a_odd = -(xa & 1);
cc = inner_gf5248_sbb(0, xa, xb, &t0);
(void)inner_gf5248_sbb(cc, 0, 0, &swap);
swap &= a_odd;
t1 = swap & (xa ^ xb);
xa ^= t1;
xb ^= t1;
t2 = swap & (f0 ^ f1);
f0 ^= t2;
f1 ^= t2;
t3 = swap & (g0 ^ g1);
g0 ^= t3;
g1 ^= t3;
xa -= a_odd & xb;
f0 -= a_odd & f1;
g0 -= a_odd & g1;
xa >>= 1;
f1 <<= 1;
g1 <<= 1;
}
gf5248_lin(d, &u, &v, f1, g1);
// At the point:
// - Numerator and denominator were both in Montgomery representation,
// but the two factors R canceled each other.
// - We have injected 31*15+35 = 500 extra factors of 2, hence we
// must divide the result by 2^500.
// - However, we also want to obtain the result in Montgomery
// representation, i.e. multiply by 2^256. We thus want to
// divide the current result by 2^(500 - 256) = 2^244.
// - We do this division by using a Montgomery multiplication with
// the Montgomery representation of 1/2^244, i.e. the integer
// 2^256/2^244 = 4096.
gf5248_mul(d, d, &INVT244);
return r;
}
// see gf5248.h
uint32_t
gf5248_invert(gf5248 *d, const gf5248 *a)
{
return gf5248_div(d, &gf5248_ONE, a);
}
// see gf5248.h
int32_t
gf5248_legendre(const gf5248 *x)
{
// Same algorithm as the binary GCD in gf5248_div(), with
// a few differences:
// - We do not keep track of the Bézout coefficients u and v.
// - In each inner iteration we adjust the running symbol value,
// which uses the low 3 bits of the values.
// - Since we need two extra bits of look-ahead, we can only run
// 29 inner iterations, and then need an extra recomputation
// for the last 2.
gf5248 a, b;
uint64_t xa, xb, f0, g0, f1, g1, ls;
inner_gf5248_normalize(&a, x);
b = MODULUS;
ls = 0; // running symbol information in bit 1.
// Outer loop
for (int i = 0; i < 15; i++) {
// Get approximations of a and b over 64 bits.
uint64_t m3 = a.v3 | b.v3;
uint64_t m2 = a.v2 | b.v2;
uint64_t m1 = a.v1 | b.v1;
uint64_t tnz3 = sgnw(m3 | -m3);
uint64_t tnz2 = sgnw(m2 | -m2) & ~tnz3;
uint64_t tnz1 = sgnw(m1 | -m1) & ~tnz3 & ~tnz2;
uint64_t tnzm = (m3 & tnz3) | (m2 & tnz2) | (m1 & tnz1);
uint64_t tnza = (a.v3 & tnz3) | (a.v2 & tnz2) | (a.v1 & tnz1);
uint64_t tnzb = (b.v3 & tnz3) | (b.v2 & tnz2) | (b.v1 & tnz1);
uint64_t snza = (a.v2 & tnz3) | (a.v1 & tnz2) | (a.v0 & tnz1);
uint64_t snzb = (b.v2 & tnz3) | (b.v1 & tnz2) | (b.v0 & tnz1);
int64_t s = lzcnt(tnzm);
uint64_t sm = (uint64_t)((31 - s) >> 63);
tnza ^= sm & (tnza ^ ((tnza << 32) | (snza >> 32)));
tnzb ^= sm & (tnzb ^ ((tnzb << 32) | (snzb >> 32)));
s -= 32 & sm;
tnza <<= s;
tnzb <<= s;
uint64_t tzx = ~(tnz1 | tnz2 | tnz3);
tnza |= a.v0 & tzx;
tnzb |= b.v0 & tzx;
xa = (a.v0 & 0x7FFFFFFF) | (tnza & 0xFFFFFFFF80000000);
xb = (b.v0 & 0x7FFFFFFF) | (tnzb & 0xFFFFFFFF80000000);
// First 290 inner iterations.
uint64_t fg0 = (uint64_t)1;
uint64_t fg1 = (uint64_t)1 << 32;
for (int j = 0; j < 29; j++) {
uint64_t a_odd, swap, t0, t1, t2;
unsigned char cc;
a_odd = -(xa & 1);
cc = inner_gf5248_sbb(0, xa, xb, &t0);
(void)inner_gf5248_sbb(cc, 0, 0, &swap);
swap &= a_odd;
ls ^= swap & xa & xb;
t1 = swap & (xa ^ xb);
xa ^= t1;
xb ^= t1;
t2 = swap & (fg0 ^ fg1);
fg0 ^= t2;
fg1 ^= t2;
xa -= a_odd & xb;
fg0 -= a_odd & fg1;
xa >>= 1;
fg1 <<= 1;
ls ^= (xb + 2) >> 1;
}
// Compute the updated a and b (low words only) to get
// enough bits for the next two iterations.
uint64_t fg0z = fg0 + 0x7FFFFFFF7FFFFFFF;
uint64_t fg1z = fg1 + 0x7FFFFFFF7FFFFFFF;
f0 = (fg0z & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g0 = (fg0z >> 32) - (uint64_t)0x7FFFFFFF;
f1 = (fg1z & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g1 = (fg1z >> 32) - (uint64_t)0x7FFFFFFF;
uint64_t a0 = (a.v0 * f0 + b.v0 * g0) >> 29;
uint64_t b0 = (a.v0 * f1 + b.v0 * g1) >> 29;
for (int j = 0; j < 2; j++) {
uint64_t a_odd, swap, t0, t1, t2, t3;
unsigned char cc;
a_odd = -(xa & 1);
cc = inner_gf5248_sbb(0, xa, xb, &t0);
(void)inner_gf5248_sbb(cc, 0, 0, &swap);
swap &= a_odd;
ls ^= swap & a0 & b0;
t1 = swap & (xa ^ xb);
xa ^= t1;
xb ^= t1;
t2 = swap & (fg0 ^ fg1);
fg0 ^= t2;
fg1 ^= t2;
t3 = swap & (a0 ^ b0);
a0 ^= t3;
b0 ^= t3;
xa -= a_odd & xb;
fg0 -= a_odd & fg1;
a0 -= a_odd & b0;
xa >>= 1;
fg1 <<= 1;
a0 >>= 1;
ls ^= (b0 + 2) >> 1;
}
// Propagate updates to a and b.
fg0 += 0x7FFFFFFF7FFFFFFF;
fg1 += 0x7FFFFFFF7FFFFFFF;
f0 = (fg0 & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g0 = (fg0 >> 32) - (uint64_t)0x7FFFFFFF;
f1 = (fg1 & 0xFFFFFFFF) - (uint64_t)0x7FFFFFFF;
g1 = (fg1 >> 32) - (uint64_t)0x7FFFFFFF;
gf5248 na, nb;
uint64_t nega = lindiv31abs(&na, &a, &b, f0, g0);
(void)lindiv31abs(&nb, &a, &b, f1, g1);
ls ^= nega & nb.v0;
a = na;
b = nb;
}
// Final iterations: values are at most 37 bits now. We do not
// need to keep track of update coefficients. Just like the GCD,
// we need only 35 iterations, because after 35 iterations,
// value a is 0 or 1, and b is 1, and no further modification to
// the Legendre symbol may happen.
xa = a.v0;
xb = b.v0;
for (int j = 0; j < 35; j++) {
uint64_t a_odd, swap, t0, t1;
unsigned char cc;
a_odd = -(xa & 1);
cc = inner_gf5248_sbb(0, xa, xb, &t0);
(void)inner_gf5248_sbb(cc, 0, 0, &swap);
swap &= a_odd;
ls ^= swap & xa & xb;
t1 = swap & (xa ^ xb);
xa ^= t1;
xb ^= t1;
xa -= a_odd & xb;
xa >>= 1;
ls ^= (xb + 2) >> 1;
}
// At this point, if the source value was not zero, then the low
// bit of ls contains the QR status (0 = square, 1 = non-square),
// which we need to convert to the expected value (+1 or -1).
// If y == 0, then we return 0, per the API.
uint32_t r = 1 - ((uint32_t)ls & 2);
r &= ~gf5248_iszero(x);
return *(int32_t *)&r;
}
// see gf5248.h
uint32_t
gf5248_sqrt(gf5248 *d, const gf5248 *a)
{
// Candidate root is a^((q+1)/4), with (q+1)/4 = 5*2^246
gf5248 y;
gf5248_xsquare(&y, a, 2);
gf5248_mul(&y, &y, a);
gf5248_xsquare(&y, &y, 246);
// Normalize y and negate if necessary, to set the low bit to 0.
// The low bit check must be on the normal representation,
// not the Montgomery representation.
gf5248 yn;
inner_gf5248_montgomery_reduce(&yn, &y);
uint32_t ctl = -((uint32_t)yn.v0 & 1);
gf5248_neg(&yn, &y);
gf5248_select(&y, &y, &yn, ctl);
// Check whether the candidate is indeed a square root.
gf5248_square(&yn, &y);
uint32_t r = gf5248_equals(&yn, a);
*d = y;
return r;
}
// Little-endian encoding of a 64-bit integer.
static inline void
enc64le(void *dst, uint64_t x)
{
uint8_t *buf = dst;
buf[0] = (uint8_t)x;
buf[1] = (uint8_t)(x >> 8);
buf[2] = (uint8_t)(x >> 16);
buf[3] = (uint8_t)(x >> 24);
buf[4] = (uint8_t)(x >> 32);
buf[5] = (uint8_t)(x >> 40);
buf[6] = (uint8_t)(x >> 48);
buf[7] = (uint8_t)(x >> 56);
}
// Little-endian decoding of a 64-bit integer.
static inline uint64_t
dec64le(const void *src)
{
const uint8_t *buf = src;
return (uint64_t)buf[0] | ((uint64_t)buf[1] << 8) | ((uint64_t)buf[2] << 16) | ((uint64_t)buf[3] << 24) |
((uint64_t)buf[4] << 32) | ((uint64_t)buf[5] << 40) | ((uint64_t)buf[6] << 48) | ((uint64_t)buf[7] << 56);
}
// see gf5248.h
void
gf5248_encode(void *dst, const gf5248 *a)
{
uint8_t *buf = dst;
gf5248 x;
inner_gf5248_montgomery_reduce(&x, a);
enc64le(buf, x.v0);
enc64le(buf + 8, x.v1);
enc64le(buf + 16, x.v2);
enc64le(buf + 24, x.v3);
}
// see gf5248.h
uint32_t
gf5248_decode(gf5248 *d, const void *src)
{
const uint8_t *buf = src;
uint64_t d0, d1, d2, d3, t;
unsigned char cc;
d0 = dec64le(buf);
d1 = dec64le(buf + 8);
d2 = dec64le(buf + 16);
d3 = dec64le(buf + 24);
cc = inner_gf5248_sbb(0, d0, MODULUS.v0, &t);
cc = inner_gf5248_sbb(cc, d1, MODULUS.v1, &t);
cc = inner_gf5248_sbb(cc, d2, MODULUS.v2, &t);
cc = inner_gf5248_sbb(cc, d3, MODULUS.v3, &t);
(void)inner_gf5248_sbb(cc, 0, 0, &t);
// If the value was not canonical then t = 0; otherwise, t = -1.
d->v0 = d0 & t;
d->v1 = d1 & t;
d->v2 = d2 & t;
d->v3 = d3 & t;
// Convert to Montgomery representation.
gf5248_mul(d, d, &R2);
return (uint32_t)t;
}
// see gf5248.h
void
gf5248_decode_reduce(gf5248 *d, const void *src, size_t len)
{
const uint8_t *buf = src;
*d = gf5248_ZERO;
if (len == 0) {
return;
}
if ((len & 31) != 0) {
// Input size is not a multiple of 32, we decode a partial
// block, which is already less than 2^248.
uint8_t tmp[32];
size_t k;
k = len & ~(size_t)31;
memcpy(tmp, buf + k, len - k);
memset(tmp + len - k, 0, (sizeof tmp) - (len - k));
d->v0 = dec64le(&tmp[0]);
d->v1 = dec64le(&tmp[8]);
d->v2 = dec64le(&tmp[16]);
d->v3 = dec64le(&tmp[24]);
len = k;
} else {
// Input size is a multiple of 32, we decode a full block,
// and a reduction is needed.
len -= 32;
uint64_t d0 = dec64le(buf + len);
uint64_t d1 = dec64le(buf + len + 8);
uint64_t d2 = dec64le(buf + len + 16);
uint64_t d3 = dec64le(buf + len + 24);
inner_gf5248_partial_reduce(d, d0, d1, d2, d3);
}
// Process all remaining blocks, in descending address order.
while (len > 0) {
gf5248_mul(d, d, &R2);
len -= 32;
uint64_t t0 = dec64le(buf + len);
uint64_t t1 = dec64le(buf + len + 8);
uint64_t t2 = dec64le(buf + len + 16);
uint64_t t3 = dec64le(buf + len + 24);
gf5248 t;
inner_gf5248_partial_reduce(&t, t0, t1, t2, t3);
gf5248_add(d, d, &t);
}
// Final conversion to Montgomery representation.
gf5248_mul(d, d, &R2);
}
void
gf5248_div3(gf5248 *d, const gf5248 *a)
{
const digit_t MAGIC = 0xAAAAAAAAAAAAAAAB; // 3^-1 mod 2^64
uint64_t c0, c1, f0, f1;
gf5248 t;
inner_gf5248_umul(f0, f1, a->arr[3], MAGIC);
t.arr[3] = f1 >> 1;
c1 = a->arr[3] - 3 * t.arr[3];
for (int32_t i = 2; i >= 0; i--) {
c0 = c1;
inner_gf5248_umul(f0, f1, a->arr[i], MAGIC);
t.arr[i] = f1 >> 1;
c1 = c0 + a->arr[i] - 3 * t.arr[i];
t.arr[i] += c0 * ((MAGIC - 1) >> 1);
f0 = ((c1 >> 1) & c1); /* c1 == 3 */
f1 = ((c1 >> 2) & !(c1 & 0x11)); /* c1 == 4 */
f0 |= f1;
t.arr[i] += f0;
c1 = c1 - 3 * f0;
}
*d = t;
gf5248_sub(&t, d, &PM1O3);
gf5248_select(d, d, &t, -((c1 & 1) | (c1 >> 1))); // c1 >= 1
gf5248_sub(&t, d, &PM1O3);
gf5248_select(d, d, &t, -(c1 == 2));
}

View File

@@ -1,7 +1,8 @@
#ifndef FP_H
#define FP_H
//////////////////////////////////////////////// NOTE: this is placed here for now
// Include statements
#include <sqisign_namespace.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
@@ -10,67 +11,129 @@
#include <tutil.h>
#include <fp_constants.h>
typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements
#include "gf5248.h"
void fp_set(digit_t* x, const digit_t val);
bool fp_is_equal(const digit_t* a, const digit_t* b);
bool fp_is_zero(const digit_t* a);
void fp_copy(digit_t* out, const digit_t* a);
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords);
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords);
void fp_add(digit_t* out, const digit_t* a, const digit_t* b);
void fp_sub(digit_t* out, const digit_t* a, const digit_t* b);
void fp_neg(digit_t* out, const digit_t* a);
void fp_sqr(digit_t* out, const digit_t* a);
void fp_mul(digit_t* out, const digit_t* a, const digit_t* b);
void MUL(digit_t* out, const digit_t a, const digit_t b);
void fp_inv(digit_t* x);
bool fp_is_square(const digit_t* a);
void fp_sqrt(digit_t* a);
void fp_tomont(digit_t* out, const digit_t* a);
void fp_frommont(digit_t* out, const digit_t* a);
void fp_mont_setone(digit_t* out);
// Type for elements of GF(p)
#define fp_t gf5248
/********************** Constant-time unsigned comparisons ***********************/
// Constants (Assumed to be in Montgomery form)
#define ZERO gf5248_ZERO
#define ONE gf5248_ONE
// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
static inline unsigned int is_digit_nonzero_ct(digit_t x)
{ // Is x != 0?
return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
// Operations in fp
static inline void
fp_neg(fp_t *d, const fp_t *a)
{
gf5248_neg(d, a);
}
static inline unsigned int is_digit_zero_ct(digit_t x)
{ // Is x = 0?
return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
void fp_add(fp_t *out, const fp_t *a, const fp_t *b); // implemented in fp_asm.S
void fp_sub(fp_t *out, const fp_t *a, const fp_t *b); // implemented in fp_asm.S
void fp_sqr(fp_t *out, const fp_t *a); // implemented in fp_asm.S
void fp_mul(fp_t *out, const fp_t *a, const fp_t *b); // implemented in fp_asm.S
static inline void
fp_mul_small(fp_t *d, const fp_t *a, uint32_t n)
{
gf5248_mul_small(d, a, n);
}
static inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
{ // Is x < y?
return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
static inline void
fp_half(fp_t *d, const fp_t *a)
{
gf5248_half(d, a);
}
// #define fp_half gf5248_half
static inline void
fp_div3(fp_t *d, const fp_t *a)
{
gf5248_div3(d, a);
}
// #define fp_div3 gf5248_div3
// Constant time selection and swapping
static inline void
fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl)
{
gf5248_select(d, a0, a1, ctl);
}
// #define fp_select gf5248_select
static inline void
fp_cswap(fp_t *a, fp_t *b, uint32_t ctl)
{
gf5248_cswap(a, b, ctl);
}
// #define fp_cswap gf5248_cswap
// Comparisons for fp elements
static inline uint32_t
fp_is_zero(const fp_t *a)
{
return gf5248_iszero(a);
}
// #define fp_is_zero gf5248_iszero
static inline uint32_t
fp_is_equal(const fp_t *a, const fp_t *b)
{
return gf5248_equals(a, b);
}
// #define fp_is_equal gf5248_equals
// Set a uint32 to an Fp value
static inline void
fp_set_small(fp_t *d, uint32_t x)
{
gf5248_set_small(d, x);
}
// #define fp_set_small gf5248_set_small
// Encoding and decoding of bytes
static inline void
fp_encode(void *dst, const fp_t *a)
{
gf5248_encode(dst, a);
}
// #define fp_encode gf5248_encode
static inline uint32_t
fp_decode(fp_t *d, const void *src)
{
return gf5248_decode(d, src);
}
// #define fp_decode gf5248_decode
static inline void
fp_decode_reduce(fp_t *d, const void *src, size_t len)
{
gf5248_decode_reduce(d, src, len);
}
// #define fp_decode_reduce gf5248_decode_reduce
// These functions are essentially useless because we can just
// use = for the shallow copies we need, but they're here for
// now until we do a larger refactoring
static inline void
fp_copy(fp_t *out, const fp_t *a)
{
memcpy(out, a, sizeof(fp_t));
}
/********************** Platform-independent macros for digit-size operations **********************/
static inline void
fp_set_zero(fp_t *a)
{
memcpy(a, &ZERO, sizeof(fp_t));
}
// Digit addition with carry
#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \
{ digit_t tempReg = (addend1) + (digit_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); }
static inline void
fp_set_one(fp_t *a)
{
memcpy(a, &ONE, sizeof(fp_t));
}
// Digit subtraction with borrow
#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \
{ digit_t tempReg = (minuend) - (subtrahend); \
unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \
(differenceOut) = tempReg - (digit_t)(borrowIn); \
(borrowOut) = borrowReg; }
// Functions defined in low level code but with different API
void fp_inv(fp_t *a);
void fp_sqrt(fp_t *a);
void fp_exp3div4(fp_t *a);
uint32_t fp_is_square(const fp_t *a);
// Shift right with flexible datatype
#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
// Digit shift left
#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift)));
#endif
#endif

View File

@@ -1,29 +1,41 @@
#ifndef FP2_H
#define FP2_H
#include "fp.h"
#define NO_FP2X_MUL
#define NO_FP2X_SQR
// Structure for representing elements in GF(p^2)
typedef struct fp2_t {
fp_t re, im;
} fp2_t;
#include <fp2x.h>
void fp2_set(fp2_t* x, const digit_t val);
bool fp2_is_zero(const fp2_t* a);
bool fp2_is_equal(const fp2_t* a, const fp2_t* b);
void fp2_copy(fp2_t* x, const fp2_t* y);
fp2_t fp2_non_residue();
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_neg(fp2_t* x, const fp2_t* y);
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_sqr(fp2_t* x, const fp2_t* y);
void fp2_inv(fp2_t* x);
bool fp2_is_square(const fp2_t* x);
void fp2_frob(fp2_t* x, const fp2_t* y);
void fp2_sqrt(fp2_t* x);
void fp2_tomont(fp2_t* x, const fp2_t* y);
void fp2_frommont(fp2_t* x, const fp2_t* y);
int fp2_cmp(fp2_t* x, fp2_t* y);
extern void fp2_sq_c0(fp2_t *out, const fp2_t *in);
extern void fp2_sq_c1(fp_t *out, const fp2_t *in);
#endif
extern void fp2_mul_c0(fp_t *out, const fp2_t *in0, const fp2_t *in1);
extern void fp2_mul_c1(fp_t *out, const fp2_t *in0, const fp2_t *in1);
static inline void
fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z)
{
fp_t t;
fp2_mul_c0(&t, y, z); // c0 = a0*b0 - a1*b1
fp2_mul_c1(&x->im, y, z); // c1 = a0*b1 + a1*b0
x->re.arr[0] = t.arr[0];
x->re.arr[1] = t.arr[1];
x->re.arr[2] = t.arr[2];
x->re.arr[3] = t.arr[3];
}
static inline void
fp2_sqr(fp2_t *x, const fp2_t *y)
{
fp2_t t;
fp2_sq_c0(&t, y); // c0 = (a0+a1)(a0-a1)
fp2_sq_c1(&x->im, y); // c1 = 2a0*a1
x->re.arr[0] = t.re.arr[0];
x->re.arr[1] = t.re.arr[1];
x->re.arr[2] = t.re.arr[2];
x->re.arr[3] = t.re.arr[3];
}
#endif

Some files were not shown because too many files have changed in this diff Show More