initial version of SQIsign

Co-authored-by: Jorge Chavez-Saab <jorgechavezsaab@gmail.com>
Co-authored-by: Maria Corte-Real Santos <36373796+mariascrs@users.noreply.github.com>
Co-authored-by: Luca De Feo <github@defeo.lu>
Co-authored-by: Jonathan Komada Eriksen <jonathan.eriksen97@gmail.com>
Co-authored-by: Basil Hess <bhe@zurich.ibm.com>
Co-authored-by: Antonin Leroux <18654258+tonioecto@users.noreply.github.com>
Co-authored-by: Patrick Longa <plonga@microsoft.com>
Co-authored-by: Lorenz Panny <lorenz@yx7.cc>
Co-authored-by: Francisco Rodríguez-Henríquez <francisco.rodriguez@tii.ae>
Co-authored-by: Sina Schaeffler <108983332+syndrakon@users.noreply.github.com>
Co-authored-by: Benjamin Wesolowski <19474926+Calodeon@users.noreply.github.com>
This commit is contained in:
SQIsign team
2023-06-01 00:00:00 +00:00
committed by Lorenz Panny
commit 28ff420dd0
285 changed files with 70301 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
set(SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL
fp_asm.S fp.c fp2.c
)
add_library(${LIB_GF_${SVARIANT_UPPER}} ${SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL})
target_include_directories(${LIB_GF_${SVARIANT_UPPER}} PRIVATE common ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} include ${PROJECT_SOURCE_DIR}/include ${INC_COMMON})
target_compile_options(${LIB_GF_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
add_subdirectory(test)

View File

@@ -0,0 +1,46 @@
CC=gcc
CFLAGS= -O3 -std=gnu11 -Wall -march=native -Wno-missing-braces -Wno-logical-not-parentheses
LDFLAGS=-lm
AR=ar rcs
RANLIB=ranlib
OBJECTS=objs/fp_p1913.o objs/fp.o objs/fp2.o objs/fp_asm.o objs/random.o
all: lib tests
objs/fp_p1913.o: fp_p1913.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp_p1913.c -o objs/fp_p1913.o
objs/fp.o: fp.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp.c -o objs/fp.o
objs/fp2.o: fp2.c
@mkdir -p $(@D)
$(CC) -c $(CFLAGS) fp2.c -o objs/fp2.o
objs/fp_asm.o: fp_asm.S
$(CC) -c $(CFLAGS) fp_asm.S -o objs/fp_asm.o
objs/random.o: ../../../common/generic/randombytes_system.c
$(CC) -c $(CFLAGS) ../../../common/generic/randombytes_system.c -o objs/random.o
lib: $(OBJECTS)
rm -rf lib
mkdir lib
$(AR) lib/libtest.a $^
$(RANLIB) lib/libtest.a
tests: lib
$(CC) $(CFLAGS) -L./lib test/test_fp.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp -lgmp
$(CC) $(CFLAGS) -L./lib test/test_fp2.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp2 -lgmp
check: tests
.PHONY: clean
clean:
rm -rf *.req objs lib test_fp*

192
src/gf/broadwell/lvl1/fp.c Normal file
View File

@@ -0,0 +1,192 @@
#include "include/fp.h"
const uint64_t p[NWORDS_FIELD] = { 0xffffffffffffffff, 0x252C9E49355147FF, 0x33A6A86587407437, 0x34E29E286B95D98C };
const uint64_t R2[NWORDS_FIELD] = { 0x233625AE400674D4, 0x20AFD6C1025A1C2E, 0x30A841AB0920655D, 0x0D72E7D67C30CD3D };
const uint64_t pp[NWORDS_FIELD] = { 0x01, 0x00, 0x00, 0x00 };
void fp_set(digit_t* x, const digit_t val)
{ // Set field element x = val, where val has wordsize
x[0] = val;
for (unsigned int i = 1; i < NWORDS_FIELD; i++) {
x[i] = 0;
}
}
void fp_mont_setone(digit_t* out1) {
out1[0] = 0x4;
out1[1] = UINT64_C(0x6b4d86db2abae000);
out1[2] = UINT64_C(0x31655e69e2fe2f23);
out1[3] = UINT64_C(0x2c75875e51a899cf);
}
bool fp_is_equal(const digit_t* a, const digit_t* b)
{ // Compare two field elements in constant time
// Returns 1 (true) if a=b, 0 (false) otherwise
digit_t r = 0;
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
r |= a[i] ^ b[i];
return (bool)is_digit_zero_ct(r);
}
bool fp_is_zero(const digit_t* a)
{ // Is a field element zero?
// Returns 1 (true) if a=0, 0 (false) otherwise
digit_t r = 0;
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
r |= a[i] ^ 0;
return (bool)is_digit_zero_ct(r);
}
void fp_copy(digit_t* out, const digit_t* a)
{
memcpy(out, a, NWORDS_FIELD*RADIX/8);
}
void fp_neg(digit_t* out, const digit_t* a)
{ // Modular negation, out = -a mod p
// Input: a in [0, p-1]
// Output: out in [0, p-1]
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(out[i], borrow, ((digit_t*)p)[i], a[i], borrow);
}
fp_sub(out, out, (digit_t*)p);
}
void fp_tomont(digit_t* out, const digit_t* a)
{ // Conversion to Montgomery representation
// out = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
fp_mul(out, a, (digit_t*)&R2);
}
void fp_frommont(digit_t* out, const digit_t* a)
{ // Conversion from Montgomery representation to standard representation
// out = a*R^(-1) mod p, where a in [0, p-1].
digit_t one[NWORDS_FIELD] = {0};
one[0] = 1;
fp_mul(out, a, one);
}
void MUL(digit_t* out, const digit_t a, const digit_t b)
{ // Digit multiplication, digit*digit -> 2-digit result
// Inputs: a, b in [0, 2^w-1], where w is the computer wordsize
// Output: 0 < out < 2^(2w)-1
register digit_t al, ah, bl, bh, temp;
digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
al = a & mask_low; // Low part
ah = a >> (sizeof(digit_t)*4); // High part
bl = b & mask_low;
bh = b >> (sizeof(digit_t)*4);
albl = al * bl;
albh = al * bh;
ahbl = ah * bl;
ahbh = ah * bh;
out[0] = albl & mask_low; // out00
res1 = albl >> (sizeof(digit_t)*4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(digit_t)*4);
out[0] ^= temp << (sizeof(digit_t)*4); // out01
res1 = ahbl >> (sizeof(digit_t)*4);
res2 = albh >> (sizeof(digit_t)*4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
out[1] = temp & mask_low; // out10
carry = temp & mask_high;
out[1] ^= (ahbh & mask_high) + carry; // out11
}
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords)
{ // Multiprecision right shift
digit_t bit_out = x[0] & 1;
for (unsigned int i = 0; i < nwords-1; i++) {
SHIFTR(x[i+1], x[i], shift, x[i], RADIX);
}
x[nwords-1] >>= shift;
return bit_out;
}
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords)
{ // Multiprecision left shift
for (int i = nwords-1; i > 0; i--) {
SHIFTL(x[i], x[i-1], shift, x[i], RADIX);
}
x[0] <<= shift;
}
static void fp_exp3div4(digit_t* out, const digit_t* a)
{ // Fixed exponentiation out = a^((p-3)/4) mod p
// Input: a in [0, p-1]
// Output: out in [0, p-1]
// Requirement: p = 3(mod 4)
fp_t p_t, acc;
digit_t bit;
memcpy((digit_t*)p_t, (digit_t*)p, NWORDS_FIELD*RADIX/8);
memcpy((digit_t*)acc, (digit_t*)a, NWORDS_FIELD*RADIX/8);
mp_shiftr(p_t, 1, NWORDS_FIELD);
mp_shiftr(p_t, 1, NWORDS_FIELD);
fp_set(out, 1);
fp_tomont(out, out);
for (int i = 0; i < NWORDS_FIELD*RADIX-2; i++) {
bit = p_t[0] & 1;
mp_shiftr(p_t, 1, NWORDS_FIELD);
if (bit == 1) {
fp_mul(out, out, acc);
}
fp_sqr(acc, acc);
}
}
void fp_inv(digit_t* a)
{ // Modular inversion, out = x^-1*R mod p, where R = 2^(w*nwords), w is the computer wordsize and nwords is the number of words to represent p
// Input: a=xR in [0, p-1]
// Output: out in [0, p-1]. It outputs 0 if the input does not have an inverse
// Requirement: Ceiling(Log(p)) < w*nwords
fp_t t;
fp_exp3div4(t, a);
fp_sqr(t, t);
fp_sqr(t, t);
fp_mul(a, t, a); // a^(p-2)
}
bool fp_is_square(const digit_t* a)
{ // Is field element a square?
// Output: out = 0 (false), 1 (true)
fp_t t, one;
fp_exp3div4(t, a);
fp_sqr(t, t);
fp_mul(t, t, a); // a^((p-1)/2)
fp_frommont(t, t);
fp_set(one, 1);
return fp_is_equal(t, one);
}
void fp_sqrt(digit_t* a)
{ // Square root computation, out = a^((p+1)/4) mod p
fp_t t;
fp_exp3div4(t, a);
fp_mul(a, t, a); // a^((p+1)/4)
}

190
src/gf/broadwell/lvl1/fp2.c Normal file
View File

@@ -0,0 +1,190 @@
#include <fp2.h>
extern const digit_t R[NWORDS_FIELD];
extern void fp2_sq_c0(fp2_t *out, const fp2_t *in);
extern void fp2_sq_c1(fp_t *out, const fp2_t *in);
extern void fp2_mul_c0(fp_t *out, const fp2_t *in0, const fp2_t *in1);
extern void fp2_mul_c1(fp_t *out, const fp2_t *in0, const fp2_t *in1);
/* Arithmetic modulo X^2 + 1 */
void fp2_set(fp2_t* x, const digit_t val)
{
fp_set(x->re, val);
fp_set(x->im, 0);
}
bool fp2_is_zero(const fp2_t* a)
{ // Is a GF(p^2) element zero?
// Returns 1 (true) if a=0, 0 (false) otherwise
return fp_is_zero(a->re) & fp_is_zero(a->im);
}
bool fp2_is_equal(const fp2_t* a, const fp2_t* b)
{ // Compare two GF(p^2) elements in constant time
// Returns 1 (true) if a=b, 0 (false) otherwise
return fp_is_equal(a->re, b->re) & fp_is_equal(a->im, b->im);
}
void fp2_copy(fp2_t* x, const fp2_t* y)
{
fp_copy(x->re, y->re);
fp_copy(x->im, y->im);
}
fp2_t fp2_non_residue()
{ // 2 + i is a quadratic non-residue for p1913
fp_t one = {0};
fp2_t res;
one[0] = 1;
fp_tomont(one, one);
fp_add(res.re, one, one);
fp_copy(res.im, one);
return res;
}
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_add(x->re, y->re, z->re);
fp_add(x->im, y->im, z->im);
}
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_sub(x->re, y->re, z->re);
fp_sub(x->im, y->im, z->im);
}
void fp2_neg(fp2_t* x, const fp2_t* y)
{
fp_neg(x->re, y->re);
fp_neg(x->im, y->im);
}
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z)
{
fp_t t;
fp2_mul_c0(&t, y, z); // c0 = a0*b0 - a1*b1
fp2_mul_c1(&x->im, y, z); // c1 = a0*b1 + a1*b0
x->re[0] = t[0]; x->re[1] = t[1]; x->re[2] = t[2]; x->re[3] = t[3];
}
void fp2_sqr(fp2_t* x, const fp2_t* y) {
fp2_t t;
fp2_sq_c0(&t, y); // c0 = (a0+a1)(a0-a1)
fp2_sq_c1(&x->im, y); // c1 = 2a0*a1
x->re[0] = t.re[0]; x->re[1] = t.re[1]; x->re[2] = t.re[2]; x->re[3] = t.re[3];
}
void fp2_inv(fp2_t* x)
{
fp_t t0, t1;
fp_sqr(t0, x->re);
fp_sqr(t1, x->im);
fp_add(t0, t0, t1);
fp_inv(t0);
fp_mul(x->re, x->re, t0);
fp_mul(x->im, x->im, t0);
fp_neg(x->im, x->im);
}
bool fp2_is_square(const fp2_t* x)
{
fp_t t0, t1;
fp_sqr(t0, x->re);
fp_sqr(t1, x->im);
fp_add(t0, t0, t1);
return fp_is_square(t0);
}
void fp2_frob(fp2_t* x, const fp2_t* y)
{
memcpy((digit_t*)x->re, (digit_t*)y->re, NWORDS_FIELD*RADIX/8);
fp_neg(x->im, y->im);
}
void fp2_tomont(fp2_t* x, const fp2_t* y)
{
fp_tomont(x->re, y->re);
fp_tomont(x->im, y->im);
}
void fp2_frommont(fp2_t* x, const fp2_t* y)
{
fp_frommont(x->re, y->re);
fp_frommont(x->im, y->im);
}
// NOTE: old, non-constant-time implementation. Could be optimized
void fp2_sqrt(fp2_t* x)
{
fp_t sdelta, re, tmp1, tmp2, inv2, im;
if (fp_is_zero(x->im)) {
if (fp_is_square(x->re)) {
fp_sqrt(x->re);
return;
} else {
fp_neg(x->im, x->re);
fp_sqrt(x->im);
fp_set(x->re, 0);
return;
}
}
// sdelta = sqrt(re^2 + im^2)
fp_sqr(sdelta, x->re);
fp_sqr(tmp1, x->im);
fp_add(sdelta, sdelta, tmp1);
fp_sqrt(sdelta);
fp_set(inv2, 2);
fp_tomont(inv2, inv2); // inv2 <- 2
fp_inv(inv2);
fp_add(re, x->re, sdelta);
fp_mul(re, re, inv2);
memcpy((digit_t*)tmp2, (digit_t*)re, NWORDS_FIELD*RADIX/8);
if (!fp_is_square(tmp2)) {
fp_sub(re, x->re, sdelta);
fp_mul(re, re, inv2);
}
fp_sqrt(re);
memcpy((digit_t*)im, (digit_t*)re, NWORDS_FIELD*RADIX/8);
fp_inv(im);
fp_mul(im, im, inv2);
fp_mul(x->im, im, x->im);
memcpy((digit_t*)x->re, (digit_t*)re, NWORDS_FIELD*RADIX/8);
}
// Lexicographic comparison of two field elements. Returns +1 if x > y, -1 if x < y, 0 if x = y
int fp2_cmp(fp2_t* x, fp2_t* y){
fp2_t a, b;
fp2_frommont(&a, x);
fp2_frommont(&b, y);
for(int i = NWORDS_FIELD-1; i >= 0; i--){
if(a.re[i] > b.re[i])
return 1;
if(a.re[i] < b.re[i])
return -1;
}
for(int i = NWORDS_FIELD-1; i >= 0; i--){
if(a.im[i] > b.im[i])
return 1;
if(a.im[i] < b.im[i])
return -1;
}
return 0;
}

View File

@@ -0,0 +1,555 @@
.intel_syntax noprefix
.set pbytes,32
.set plimbs,4
.global p_plus_1
p_plus_1: .quad 0x0000000000000000, 0x252C9E4935514800, 0x33A6A86587407437, 0x34E29E286B95D98C
.text
.p2align 4,,15
.global fp_add
fp_add:
push r12
xor rax, rax
mov r8, [rsi]
mov r9, [rsi+8]
mov r10, [rsi+16]
mov r11, [rsi+24]
add r8, [rdx]
adc r9, [rdx+8]
adc r10, [rdx+16]
adc r11, [rdx+24]
mov r12, [rip+p]
sub r8, r12
mov rcx, [rip+p+8]
sbb r9, rcx
mov rsi, [rip+p+16]
sbb r10, rsi
mov rdx, [rip+p+24]
sbb r11, rdx
sbb rax, 0
and r12, rax
and rcx, rax
and rsi, rax
and rdx, rax
add r8, r12
adc r9, rcx
adc r10, rsi
adc r11, rdx
mov [rdi], r8
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
pop r12
ret
.global fp_sub
fp_sub:
push r12
xor rax, rax
mov r8, [rsi]
mov r9, [rsi+8]
mov r10, [rsi+16]
mov r11, [rsi+24]
sub r8, [rdx]
sbb r9, [rdx+8]
sbb r10, [rdx+16]
sbb r11, [rdx+24]
sbb rax, 0
mov r12, [rip+p]
mov rcx, [rip+p+8]
mov rsi, [rip+p+16]
mov rdx, [rip+p+24]
and r12, rax
and rcx, rax
and rsi, rax
and rdx, rax
add r8, r12
adc r9, rcx
adc r10, rsi
adc r11, rdx
mov [rdi], r8
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
pop r12
ret
///////////////////////////////////////////////////////////////// MACROS
// z = a x bi + z
// Inputs: base memory pointer M1 (a),
// bi pre-stored in rdx,
// accumulator z in [Z0:Z4]
// Output: [Z0:Z4]
// Temps: regs T0:T1
/////////////////////////////////////////////////////////////////
.macro MULADD64x256 M1, Z0, Z1, Z2, Z3, Z4, T0, T1, C
mulx \T0, \T1, \M1 // A0*B0
xor \C, \C
adox \Z0, \T1
adox \Z1, \T0
mulx \T0, \T1, 8\M1 // A0*B1
adcx \Z1, \T1
adox \Z2, \T0
mulx \T0, \T1, 16\M1 // A0*B2
adcx \Z2, \T1
adox \Z3, \T0
mulx \T0, \T1, 24\M1 // A0*B3
adcx \Z3, \T1
adox \Z4, \T0
adc \Z4, 0
.endm
.macro MULADD64x192 M1, Z0, Z1, Z2, Z3, T0, T1
mulx \T0, \T1, \M1 // A0*B0
xor rax, rax
adox \Z0, \T1
adox \Z1, \T0
mulx \T0, \T1, 8\M1 // A0*B1
adcx \Z1, \T1
adox \Z2, \T0
mulx \T0, \T1, 16\M1 // A0*B2
adcx \Z2, \T1
adox \Z3, \T0
adc \Z3, 0
.endm
//***********************************************************************
// Multiplication in GF(p^2), non-complex part
// Operation: c [rdi] = a0 x b0 - a1 x b1
// Inputs: a = [a1, a0] stored in [rsi]
// b = [b1, b0] stored in [rdx]
// Output: c stored in [rdi]
//***********************************************************************
.global fp2_mul_c0
fp2_mul_c0:
push r12
push r13
push r14
mov rcx, rdx
// [rdi0:3] <- p - b1
mov r8, [rip+p]
mov r9, [rip+p+8]
mov r10, [rip+p+16]
mov r11, [rip+p+24]
mov rax, [rcx+32]
mov rdx, [rcx+40]
sub r8, rax
sbb r9, rdx
mov rax, [rcx+48]
mov rdx, [rcx+56]
sbb r10, rax
sbb r11, rdx
mov [rdi], r8
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
// [r8:r12] <- z = a0 x b00 - a1 x b10
mov rdx, [rcx]
mulx r9, r8, [rsi]
xor rax, rax
mulx r10, r11, [rsi+8]
adox r9, r11
mulx r11, r12, [rsi+16]
adox r10, r12
mulx r12, r13, [rsi+24]
adox r11, r13
adox r12, rax
mov rdx, [rdi]
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r8 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
mov rdx, [rcx+8]
MULADD64x256 [rsi], r9, r10, r11, r12, r8, r13, r14, r8
mov rdx, [rdi+8]
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r9 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
mov rdx, [rcx+16]
MULADD64x256 [rsi], r10, r11, r12, r8, r9, r13, r14, r9
mov rdx, [rdi+16]
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r10 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
mov rdx, [rcx+24]
MULADD64x256 [rsi], r11, r12, r8, r9, r10, r13, r14, r10
mov rdx, [rdi+24]
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r11 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
pop r14
pop r13
pop r12
ret
//***********************************************************************
// Multiplication in GF(p^2), complex part
// Operation: c [rdi] = a0 x b1 + a1 x b0
// Inputs: a = [a1, a0] stored in [rsi]
// b = [b1, b0] stored in [rdx]
// Output: c stored in [rdi]
//***********************************************************************
.global fp2_mul_c1
fp2_mul_c1:
push r12
push r13
push r14
mov rcx, rdx
// [r8:r12] <- z = a0 x b10 + a1 x b00
mov rdx, [rcx+32]
mulx r9, r8, [rsi]
xor rax, rax
mulx r10, r11, [rsi+8]
adox r9, r11
mulx r11, r12, [rsi+16]
adox r10, r12
mulx r12, r13, [rsi+24]
adox r11, r13
adox r12, rax
mov rdx, [rcx]
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r8 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
mov rdx, [rcx+40]
MULADD64x256 [rsi], r9, r10, r11, r12, r8, r13, r14, r8
mov rdx, [rcx+8]
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r9 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
mov rdx, [rcx+48]
MULADD64x256 [rsi], r10, r11, r12, r8, r9, r13, r14, r9
mov rdx, [rcx+16]
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r10 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
mov rdx, [rcx+56]
MULADD64x256 [rsi], r11, r12, r8, r9, r10, r13, r14, r10
mov rdx, [rcx+24]
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, r11 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
pop r14
pop r13
pop r12
ret
///////////////////////////////////////////////////////////////// MACRO
// z = a x b (mod p)
// Inputs: base memory pointers M0 (a), M1 (b)
// bi pre-stored in rdx,
// accumulator z in [Z0:Z4], pre-stores a0 x b
// Output: [Z0:Z4]
// Temps: regs T0:T1
/////////////////////////////////////////////////////////////////
.macro FPMUL256x256 M0, M1, Z0, Z1, Z2, Z3, Z4, T0, T1
// [Z1:Z4] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z0 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z1, \Z2, \Z3, \Z4, \T0, \T1
// [Z1:Z4, Z0] <- z = a01 x a1 + z
mov rdx, 8\M0
MULADD64x256 \M1, \Z1, \Z2, \Z3, \Z4, \Z0, \T0, \T1, \Z0
// [Z2:Z4, Z0] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z1 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z2, \Z3, \Z4, \Z0, \T0, \T1
// [Z2:Z4, Z0:Z1] <- z = a02 x a1 + z
mov rdx, 16\M0
MULADD64x256 \M1, \Z2, \Z3, \Z4, \Z0, \Z1, \T0, \T1, \Z1
// [Z3:Z4, Z0:Z1] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z2 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z3, \Z4, \Z0, \Z1, \T0, \T1
// [Z3:Z4, Z0:Z2] <- z = a03 x a1 + z
mov rdx, 24\M0
MULADD64x256 \M1, \Z3, \Z4, \Z0, \Z1, \Z2, \T0, \T1, \Z2
// [Z4, Z0:Z2] <- z = (z0 x p_plus_1 + z)/2^64
mov rdx, \Z3 // rdx <- z0
MULADD64x192 [rip+p_plus_1+8], \Z4, \Z0, \Z1, \Z2, \T0, \T1
.endm
//***********************************************************************
// Squaring in GF(p^2), non-complex part
// Operation: c [rdi] = (a0+a1) x (a0-a1)
// Inputs: a = [a1, a0] stored in [rsi]
// Output: c stored in [rdi]
//***********************************************************************
.global fp2_sq_c0
fp2_sq_c0:
push r12
push r13
// a0 + a1
mov rdx, [rsi]
mov r9, [rsi+8]
mov r10, [rsi+16]
mov r11, [rsi+24]
add rdx, [rsi+32]
adc r9, [rsi+40]
adc r10, [rsi+48]
adc r11, [rsi+56]
mov [rdi], rdx
mov [rdi+8], r9
mov [rdi+16], r10
mov [rdi+24], r11
// a0 - a1 + p
mov r8, [rsi]
mov r10, [rsi+8]
mov r12, [rsi+16]
mov r13, [rsi+24]
sub r8, [rsi+32]
sbb r10, [rsi+40]
sbb r12, [rsi+48]
sbb r13, [rsi+56]
add r8, [rip+p]
adc r10, [rip+p+8]
adc r12, [rip+p+16]
adc r13, [rip+p+24]
mov [rdi+32], r8
mov [rdi+40], r10
mov [rdi+48], r12
mov [rdi+56], r13
// [r8:r12] <- z = a00 x a1
mulx r9, r8, r8
xor rax, rax
mulx r10, r11, r10
adox r9, r11
mulx r11, r12, r12
adox r10, r12
mulx r12, r13, r13
adox r11, r13
adox r12, rax
FPMUL256x256 [rdi], [rdi+32], r8, r9, r10, r11, r12, r13, rcx
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
pop r13
pop r12
ret
//***********************************************************************
// Squaring in GF(p^2), complex part
// Operation: c [rdi] = 2a0 x a1
// Inputs: a = [a1, a0] stored in [reg_p1]
// Output: c stored in [rdi]
//***********************************************************************
.global fp2_sq_c1
fp2_sq_c1:
push r12
push r13
mov rdx, [rsi]
mov r9, [rsi+8]
mov r10, [rsi+16]
mov r11, [rsi+24]
add rdx, rdx
adc r9, r9
adc r10, r10
adc r11, r11
sub rsp, 32
mov [rsp+8], r9
mov [rsp+16], r10
mov [rsp+24], r11
// [r8:r12] <- z = a00 x a1
mulx r9, r8, [rsi+32]
xor rax, rax
mulx r10, r11, [rsi+40]
adox r9, r11
mulx r11, r12, [rsi+48]
adox r10, r12
mulx r12, r13, [rsi+56]
adox r11, r13
adox r12, rax
FPMUL256x256 [rsp], [rsi+32], r8, r9, r10, r11, r12, r13, rcx
add rsp, 32
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
pop r13
pop r12
ret
//***********************************************************************
// Field multiplication in GF(p)
// Operation: c = a x b mod p
// Inputs: a stored in [rsi], b stored in [rdx]
// Output: c stored in [rdi]
//***********************************************************************
.global fp_mul
fp_mul:
push r12
push r13
push r14
mov rcx, rdx
// [r8:r12] <- z = a x b0
mov rdx, [rcx]
mulx r9, r8, [rsi]
xor rax, rax
mulx r10, r11, [rsi+8]
adox r9, r11
mulx r11, r12, [rsi+16]
adox r10, r12
mulx r12, r13, [rsi+24]
adox r11, r13
adox r12, rax
FPMUL256x256 [rcx], [rsi], r8, r9, r10, r11, r12, r13, r14
// Final correction
mov rsi, [rip+p]
mov rcx, [rip+p+8]
mov rdx, [rip+p+16]
mov r11, [rip+p+24]
sub r12, rsi
sbb r8, rcx
sbb r9, rdx
sbb r10, r11
sbb rax, 0
and rsi, rax
and rcx, rax
and rdx, rax
and r11, rax
add r12, rsi
adc r8, rcx
adc r9, rdx
adc r10, r11
mov [rdi], r12
mov [rdi+8], r8
mov [rdi+16], r9
mov [rdi+24], r10
pop r14
pop r13
pop r12
ret
.global fp_sqr
fp_sqr:
mov rdx, rsi
jmp fp_mul

View File

@@ -0,0 +1,76 @@
#ifndef FP_H
#define FP_H
//////////////////////////////////////////////// NOTE: this is placed here for now
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <tutil.h>
#include <fp_constants.h>
typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements
void fp_set(digit_t* x, const digit_t val);
bool fp_is_equal(const digit_t* a, const digit_t* b);
bool fp_is_zero(const digit_t* a);
void fp_copy(digit_t* out, const digit_t* a);
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords);
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords);
void fp_add(digit_t* out, const digit_t* a, const digit_t* b);
void fp_sub(digit_t* out, const digit_t* a, const digit_t* b);
void fp_neg(digit_t* out, const digit_t* a);
void fp_sqr(digit_t* out, const digit_t* a);
void fp_mul(digit_t* out, const digit_t* a, const digit_t* b);
void MUL(digit_t* out, const digit_t a, const digit_t b);
void fp_inv(digit_t* x);
bool fp_is_square(const digit_t* a);
void fp_sqrt(digit_t* a);
void fp_tomont(digit_t* out, const digit_t* a);
void fp_frommont(digit_t* out, const digit_t* a);
void fp_mont_setone(digit_t* out);
/********************** Constant-time unsigned comparisons ***********************/
// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
static inline unsigned int is_digit_nonzero_ct(digit_t x)
{ // Is x != 0?
return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
}
static inline unsigned int is_digit_zero_ct(digit_t x)
{ // Is x = 0?
return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
}
static inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
{ // Is x < y?
return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
}
/********************** Platform-independent macros for digit-size operations **********************/
// Digit addition with carry
#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \
{ digit_t tempReg = (addend1) + (digit_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); }
// Digit subtraction with borrow
#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \
{ digit_t tempReg = (minuend) - (subtrahend); \
unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \
(differenceOut) = tempReg - (digit_t)(borrowIn); \
(borrowOut) = borrowReg; }
// Shift right with flexible datatype
#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
// Digit shift left
#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift)));
#endif

View File

@@ -0,0 +1,29 @@
#ifndef FP2_H
#define FP2_H
#include "fp.h"
// Structure for representing elements in GF(p^2)
typedef struct fp2_t {
fp_t re, im;
} fp2_t;
void fp2_set(fp2_t* x, const digit_t val);
bool fp2_is_zero(const fp2_t* a);
bool fp2_is_equal(const fp2_t* a, const fp2_t* b);
void fp2_copy(fp2_t* x, const fp2_t* y);
fp2_t fp2_non_residue();
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_neg(fp2_t* x, const fp2_t* y);
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z);
void fp2_sqr(fp2_t* x, const fp2_t* y);
void fp2_inv(fp2_t* x);
bool fp2_is_square(const fp2_t* x);
void fp2_frob(fp2_t* x, const fp2_t* y);
void fp2_sqrt(fp2_t* x);
void fp2_tomont(fp2_t* x, const fp2_t* y);
void fp2_frommont(fp2_t* x, const fp2_t* y);
int fp2_cmp(fp2_t* x, fp2_t* y);
#endif

View File

@@ -0,0 +1,9 @@
add_executable(sqisign_test_gf_${SVARIANT_LOWER}_fp test_fp.c test_extras.c)
target_link_libraries(sqisign_test_gf_${SVARIANT_LOWER}_fp ${LIB_GF_${SVARIANT_UPPER}})
target_include_directories(sqisign_test_gf_${SVARIANT_LOWER}_fp PRIVATE ../include ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC})
add_test(sqisign_test_gf_${SVARIANT_LOWER}_fp sqisign_test_gf_${SVARIANT_LOWER}_fp test ${SQISIGN_TEST_REPS})
add_executable(sqisign_test_gf_${SVARIANT_LOWER}_fp2 test_fp2.c test_extras.c)
target_link_libraries(sqisign_test_gf_${SVARIANT_LOWER}_fp2 ${LIB_GF_${SVARIANT_UPPER}})
target_include_directories(sqisign_test_gf_${SVARIANT_LOWER}_fp2 PRIVATE ../include ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC})
add_test(sqisign_test_gf_${SVARIANT_LOWER}_fp2 sqisign_test_gf_${SVARIANT_LOWER}_fp2 test ${SQISIGN_TEST_REPS})

View File

@@ -0,0 +1,74 @@
#include "test_extras.h"
#include <bench.h>
// Global constants
extern const digit_t p[NWORDS_FIELD];
extern const digit_t R2[NWORDS_FIELD];
#if 0
int64_t cpucycles(void)
{ // Access system counter for benchmarking
unsigned int hi, lo;
asm volatile ("rdtsc\n\t" : "=a" (lo), "=d"(hi));
return ((int64_t)lo) | (((int64_t)hi) << 32);
}
#endif
int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
// SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
int i;
for (i = nwords-1; i >= 0; i--)
{
if (a[i] > b[i]) return 1;
else if (a[i] < b[i]) return -1;
}
return 0;
}
static void sub_test(digit_t* out, digit_t* a, digit_t* b, unsigned int nwords)
{ // Subtraction without borrow, out = a-b where a>b
// SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.
unsigned int i;
digit_t res, carry, borrow = 0;
for (i = 0; i < nwords; i++)
{
res = a[i] - b[i];
carry = (a[i] < b[i]);
out[i] = res - borrow;
borrow = carry || (res < borrow);
}
}
void fprandom_test(digit_t* a)
{ // Generating a pseudo-random field element in [0, p-1]
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
unsigned int i, diff = 256-254, nwords = NWORDS_FIELD;
unsigned char* string = NULL;
string = (unsigned char*)a;
for (i = 0; i < sizeof(digit_t)*nwords; i++) {
*(string + i) = (unsigned char)rand(); // Obtain 256-bit number
}
a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
while (compare_words((digit_t*)p, a, nwords) < 1) { // Force it to [0, modulus-1]
sub_test(a, a, (digit_t*)p, nwords);
}
}
void fp2random_test(fp2_t* a)
{ // Generating a pseudo-random element in GF(p^2)
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
fprandom_test(a->re);
fprandom_test(a->im);
}

View File

@@ -0,0 +1,25 @@
#ifndef TEST_EXTRAS_H
#define TEST_EXTRAS_H
#include <time.h>
#include <stdlib.h>
#include "../include/fp.h"
#include "../include/fp2.h"
#define PASSED 0
#define FAILED 1
// Access system counter for benchmarking
//int64_t cpucycles(void);
// Comparing "nword" elements, a=b? : (1) a!=b, (0) a=b
int compare_words(digit_t* a, digit_t* b, unsigned int nwords);
// Generating a pseudo-random field element in [0, p-1]
void fprandom_test(digit_t* a);
// Generating a pseudo-random element in GF(p^2)
void fp2random_test(fp2_t* a);
#endif

View File

@@ -0,0 +1,295 @@
#include "test_extras.h"
#include <stdio.h>
#include <string.h>
#include <bench.h>
// Global constants
extern const digit_t p[NWORDS_FIELD];
// Benchmark and test parameters
static int BENCH_LOOPS = 100000; // Number of iterations per bench
static int TEST_LOOPS = 100000; // Number of iterations per test
bool fp_test()
{ // Tests for the field arithmetic
bool OK = true;
int n, passed;
fp_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing field arithmetic over GF(p): \n\n");
// Field addition
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fprandom_test(a); fprandom_test(b); fprandom_test(c); fprandom_test(d);
fp_add(d, a, b); fp_add(e, d, c); // e = (a+b)+c
fp_add(d, b, c); fp_add(f, d, a); // f = a+(b+c)
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
fp_add(d, a, b); // d = a+b
fp_add(e, b, a); // e = b+a
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(b, 0);
fp_add(d, a, b); // d = a+0
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(b, 0);
fp_neg(d, a);
fp_add(e, a, d); // e = a+(-a)
if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p) addition tests ............................................ PASSED");
else { printf(" GF(p) addition tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Field subtraction
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fprandom_test(a); fprandom_test(b); fprandom_test(c); fprandom_test(d);
fp_sub(d, a, b); fp_sub(e, d, c); // e = (a-b)-c
fp_add(d, b, c); fp_sub(f, a, d); // f = a-(b+c)
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
fp_sub(d, a, b); // d = a-b
fp_sub(e, b, a);
fp_neg(e, e); // e = -(b-a)
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(b, 0);
fp_sub(d, a, b); // d = a-0
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(b, 0);
fp_sub(e, a, a); // e = a+(-a)
if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p) subtraction tests ......................................... PASSED");
else { printf(" GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Field multiplication
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fprandom_test(a); fprandom_test(b); fprandom_test(c);
fp_tomont(ma, a);
fp_frommont(c, ma);
if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
fp_tomont(ma, a); fp_tomont(mb, b); fp_tomont(mc, c);
fp_mul(md, ma, mb); fp_mul(me, md, mc); // e = (a*b)*c
fp_mul(md, mb, mc); fp_mul(mf, md, ma); // f = a*(b*c)
fp_frommont(e, me);
fp_frommont(f, mf);
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
fp_tomont(ma, a); fp_tomont(mb, b); fp_tomont(mc, c);
fp_add(md, mb, mc); fp_mul(me, ma, md); // e = a*(b+c)
fp_mul(md, ma, mb); fp_mul(mf, ma, mc); fp_add(mf, md, mf); // f = a*b+a*c
fp_frommont(e, me);
fp_frommont(f, mf);
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
fp_tomont(ma, a); fp_tomont(mb, b);
fp_mul(md, ma, mb); // d = a*b
fp_mul(me, mb, ma); // e = b*a
fp_frommont(d, md);
fp_frommont(e, me);
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
fp_tomont(ma, a);
fp_set(b, 1); fp_tomont(mb, b);
fp_mul(md, ma, mb); // d = a*1
fp_frommont(a, ma);
fp_frommont(d, md);
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(b, 0);
fp_tomont(mb, b);
fp_mul(md, ma, mb); // d = a*0
fp_frommont(d, md);
if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p) multiplication tests ...................................... PASSED");
else { printf(" GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Field squaring
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fprandom_test(a);
fp_tomont(ma, a);
fp_sqr(mb, ma); // b = a^2
fp_mul(mc, ma, ma); // c = a*a
fp_frommont(b, mb);
fp_frommont(c, mc);
if (compare_words(b, c, NWORDS_FIELD)!=0) { passed=0; break; }
fp_set(a, 0); fp_tomont(ma, a);
fp_sqr(md, ma); // d = 0^2
if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p) squaring tests............................................. PASSED");
else { printf(" GF(p) squaring tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Field inversion
passed = 1;
for (n = 0; n < TEST_LOOPS; n++)
{
fprandom_test(a);
fp_tomont(ma, a);
fp_set(d, 1);
memcpy(mb, ma, RADIX/8 * NWORDS_FIELD);
fp_inv(ma);
fp_mul(mc, ma, mb); // c = a*a^-1
fp_frommont(c, mc);
if (compare_words(c, d, NWORDS_FIELD) != 0) { passed = 0; break; }
fp_set(a, 0);
fp_set(d, 0);
fp_inv(a); // c = 0^-1
if (compare_words(a, d, NWORDS_FIELD) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" GF(p) inversion tests............................................ PASSED");
else { printf(" GF(p) inversion tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Square root and square detection
passed = 1;
for (n = 0; n < TEST_LOOPS; n++)
{
fprandom_test(a);
fp_tomont(ma, a);
fp_sqr(mc, ma);
fp_frommont(c, mc); // c = a^2
if (fp_is_square(mc) != 1) { passed = 0; break; }
fp_sqrt(mc); // c = a = sqrt(c)
fp_neg(md, mc);
fp_frommont(c, mc);
fp_frommont(d, md);
if ((compare_words(a, c, NWORDS_FIELD) != 0) && (compare_words(a, d, NWORDS_FIELD) != 0)) { passed = 0; break; }
}
if (passed == 1) printf(" Square root, square tests........................................ PASSED");
else { printf(" Square root, square tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool fp_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
fp_t a, b, c;
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking field arithmetic: \n\n");
fprandom_test(a); fprandom_test(b); fprandom_test(c);
// GF(p) addition
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_add(c, a, b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p) addition runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p) subtraction
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_sub(c, a, b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p) subtraction runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p) multiplication
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_mul(c, a, b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p) multiplication runs in .................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p) inversion
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_inv(a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" GF(p) inversion runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p) square root
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_sqrt(a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" GF(p) square root runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Square checking
cycles = 0;
for (n = 0; n < BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp_is_square(a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Square checking runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
int main(int argc, char* argv[])
{
if (argc < 3) {
printf("Please enter an argument: 'test' or 'bench' and <reps>\n");
exit(1);
}
if (!strcmp(argv[1], "test")) {
TEST_LOOPS = atoi(argv[2]);
return !fp_test();
} else if (!strcmp(argv[1], "bench")) {
BENCH_LOOPS = atoi(argv[2]);
return !fp_run();
} else {
exit(1);
}
}

View File

@@ -0,0 +1,307 @@
#include "test_extras.h"
#include <stdio.h>
#include <string.h>
#include <bench.h>
// Global constants
extern const digit_t p[NWORDS_FIELD];
// Benchmark and test parameters
static int BENCH_LOOPS = 100000; // Number of iterations per bench
static int TEST_LOOPS = 100000; // Number of iterations per test
bool fp2_test()
{ // Tests for the GF(p^2) arithmetic
bool OK = true;
int n, passed;
fp2_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Testing arithmetic over GF(p^2): \n\n");
// Addition in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c); fp2random_test(&d);
fp2_add(&d, &a, &b); fp2_add(&e, &d, &c); // e = (a+b)+c
fp2_add(&d, &b, &c); fp2_add(&f, &d, &a); // f = a+(b+c)
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_add(&d, &a, &b); // d = a+b
fp2_add(&e, &b, &a); // e = b+a
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&b, 0);
fp2_add(&d, &a, &b); // d = a+0
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&b, 0);
fp2_neg(&d, &a);
fp2_add(&e, &a, &d); // e = a+(-a)
if (compare_words((digit_t*)&e, (digit_t*)&b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p^2) addition tests ............................................ PASSED");
else { printf(" GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Subtraction in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c); fp2random_test(&d);
fp2_sub(&d, &a, &b); fp2_sub(&e, &d, &c); // e = (a-b)-c
fp2_add(&d, &b, &c); fp2_sub(&f, &a, &d); // f = a-(b+c)
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_sub(&d, &a, &b); // d = a-b
fp2_sub(&e, &b, &a);
fp2_neg(&e, &e); // e = -(b-a)
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&b, 0);
fp2_sub(&d, &a, &b); // d = a-0
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&b, 0);
fp2_sub(&e, &a, &a); // e = a+(-a)
if (compare_words((digit_t*)&e, (digit_t*)&b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p^2) subtraction tests ......................................... PASSED");
else { printf(" GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Multiplication in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c);
fp2_tomont(&ma, &a);
fp2_frommont(&c, &ma);
if (compare_words((digit_t*)&a, (digit_t*)&c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b); fp2_tomont(&mc, &c);
fp2_mul(&md, &ma, &mb); fp2_mul(&me, &md, &mc); // e = (a*b)*c
fp2_mul(&md, &mb, &mc); fp2_mul(&mf, &md, &ma); // f = a*(b*c)
fp2_frommont(&e, &me);
fp2_frommont(&f, &mf);
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b); fp2_tomont(&mc, &c);
fp2_add(&md, &mb, &mc); fp2_mul(&me, &ma, &md); // e = a*(b+c)
fp2_mul(&md, &ma, &mb); fp2_mul(&mf, &ma, &mc); fp2_add(&mf, &md, &mf); // f = a*b+a*c
fp2_frommont(&e, &me);
fp2_frommont(&f, &mf);
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b);
fp2_mul(&md, &ma, &mb); // d = a*b
fp2_mul(&me, &mb, &ma); // e = b*a
fp2_frommont(&d, &md);
fp2_frommont(&e, &me);
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_tomont(&ma, &a);
fp2_set(&b, 1); fp2_tomont(&mb, &b);
fp2_mul(&md, &ma, &mb); // d = a*1
fp2_frommont(&a, &ma);
fp2_frommont(&d, &md);
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&b, 0);
fp2_tomont(&mb, &b);
fp2_mul(&md, &ma, &mb); // d = a*0
fp2_frommont(&d, &md);
if (compare_words((digit_t*)&b, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p^2) multiplication tests ...................................... PASSED");
else { printf(" GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Squaring in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a);
fp2_tomont(&ma, &a);
fp2_sqr(&mb, &ma); // b = a^2
fp2_mul(&mc, &ma, &ma); // c = a*a
fp2_frommont(&b, &mb);
fp2_frommont(&c, &mc);
if (compare_words((digit_t*)&b, (digit_t*)&c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
fp2_set(&a, 0); fp2_tomont(&ma, &a);
fp2_sqr(&md, &ma); // d = 0^2
if (compare_words((digit_t*)&ma, (digit_t*)&md, 2*NWORDS_FIELD)!=0) { passed=0; break; }
}
if (passed==1) printf(" GF(p^2) squaring tests............................................. PASSED");
else { printf(" GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Inversion in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a);
fp2_tomont(&ma, &a);
fp2_set(&d, 1);
memcpy(&mb, &ma, RADIX/8 * 2*NWORDS_FIELD);
fp2_inv(&ma);
fp2_mul(&mc, &ma, &mb); // c = a*a^-1
fp2_frommont(&c, &mc);
if (compare_words((digit_t*)&c, (digit_t*)&d, 2*NWORDS_FIELD) != 0) { passed = 0; break; }
fp2_set(&a, 0);
fp2_set(&d, 0);
fp2_inv(&a); // c = 0^-1
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD) != 0) { passed = 0; break; }
}
if (passed == 1) printf(" GF(p^2) inversion tests............................................ PASSED");
else { printf(" GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
printf("\n");
// Square root and square detection in GF(p^2)
passed = 1;
for (n=0; n<TEST_LOOPS; n++)
{
fp2random_test(&a);
fp2_tomont(&ma, &a);
fp2_sqr(&mc, &ma);
fp2_frommont(&c, &mc); // c = a^2
if (fp2_is_square(&mc) != 1) { passed = 0; break; }
fp2_sqrt(&mc); // c = a = sqrt(c)
fp2_neg(&md, &mc);
fp2_frommont(&c, &mc);
fp2_frommont(&d, &md);
if ((compare_words((digit_t*)&a, (digit_t*)&c, 2*NWORDS_FIELD) != 0) & (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD) != 0)) { passed = 0; break; }
}
if (passed == 1) printf(" Square root, square tests.......................................... PASSED");
else { printf(" Square root, square tests... FAILED"); printf("\n"); return false; }
printf("\n");
return OK;
}
bool fp2_run()
{
bool OK = true;
int n;
unsigned long long cycles, cycles1, cycles2;
fp2_t a, b, c;
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
printf("Benchmarking arithmetic over GF(p^2): \n\n");
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c);
// GF(p^2) addition
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_add(&c, &a, &b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p^2) addition runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p^2) subtraction
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_sub(&c, &a, &b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p^2) subtraction runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p^2) squaring
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_sqr(&c, &a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" GF(p^2) squaring runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p^2) multiplication
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_mul(&c, &a, &b);
cycles2 = cpucycles();
cycles = cycles+(cycles2-cycles1);
}
printf(" GF(p^2) multiplication runs in .................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p^2) inversion
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_inv(&a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" GF(p^2) inversion runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// GF(p^2) square root
cycles = 0;
for (n = 0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_sqrt(&a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" GF(p^2) square root runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
// Square checking
cycles = 0;
for (n=0; n<BENCH_LOOPS; n++)
{
cycles1 = cpucycles();
fp2_is_square(&a);
cycles2 = cpucycles();
cycles = cycles + (cycles2 - cycles1);
}
printf(" Square checking runs in ........................................... %7lld cycles", cycles/BENCH_LOOPS);
printf("\n");
return OK;
}
int main(int argc, char* argv[])
{
if (argc < 3) {
printf("Please enter an argument: 'test' or 'bench' and <reps>\n");
exit(1);
}
if (!strcmp(argv[1], "test")) {
TEST_LOOPS = atoi(argv[2]);
return !fp2_test();
} else if (!strcmp(argv[1], "bench")) {
BENCH_LOOPS = atoi(argv[2]);
return !fp2_run();
} else {
exit(1);
}
}