initial version of SQIsign
Co-authored-by: Jorge Chavez-Saab <jorgechavezsaab@gmail.com> Co-authored-by: Maria Corte-Real Santos <36373796+mariascrs@users.noreply.github.com> Co-authored-by: Luca De Feo <github@defeo.lu> Co-authored-by: Jonathan Komada Eriksen <jonathan.eriksen97@gmail.com> Co-authored-by: Basil Hess <bhe@zurich.ibm.com> Co-authored-by: Antonin Leroux <18654258+tonioecto@users.noreply.github.com> Co-authored-by: Patrick Longa <plonga@microsoft.com> Co-authored-by: Lorenz Panny <lorenz@yx7.cc> Co-authored-by: Francisco Rodríguez-Henríquez <francisco.rodriguez@tii.ae> Co-authored-by: Sina Schaeffler <108983332+syndrakon@users.noreply.github.com> Co-authored-by: Benjamin Wesolowski <19474926+Calodeon@users.noreply.github.com>
This commit is contained in:
10
src/gf/broadwell/lvl1/CMakeLists.txt
Normal file
10
src/gf/broadwell/lvl1/CMakeLists.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
set(SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL
|
||||
fp_asm.S fp.c fp2.c
|
||||
)
|
||||
|
||||
add_library(${LIB_GF_${SVARIANT_UPPER}} ${SOURCE_FILES_GF_${SVARIANT_UPPER}_BROADWELL})
|
||||
target_include_directories(${LIB_GF_${SVARIANT_UPPER}} PRIVATE common ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} include ${PROJECT_SOURCE_DIR}/include ${INC_COMMON})
|
||||
target_compile_options(${LIB_GF_${SVARIANT_UPPER}} PRIVATE ${C_OPT_FLAGS})
|
||||
|
||||
add_subdirectory(test)
|
||||
46
src/gf/broadwell/lvl1/Makefile
Normal file
46
src/gf/broadwell/lvl1/Makefile
Normal file
@@ -0,0 +1,46 @@
|
||||
|
||||
CC=gcc
|
||||
CFLAGS= -O3 -std=gnu11 -Wall -march=native -Wno-missing-braces -Wno-logical-not-parentheses
|
||||
LDFLAGS=-lm
|
||||
AR=ar rcs
|
||||
RANLIB=ranlib
|
||||
|
||||
OBJECTS=objs/fp_p1913.o objs/fp.o objs/fp2.o objs/fp_asm.o objs/random.o
|
||||
|
||||
all: lib tests
|
||||
|
||||
objs/fp_p1913.o: fp_p1913.c
|
||||
@mkdir -p $(@D)
|
||||
$(CC) -c $(CFLAGS) fp_p1913.c -o objs/fp_p1913.o
|
||||
|
||||
objs/fp.o: fp.c
|
||||
@mkdir -p $(@D)
|
||||
$(CC) -c $(CFLAGS) fp.c -o objs/fp.o
|
||||
|
||||
objs/fp2.o: fp2.c
|
||||
@mkdir -p $(@D)
|
||||
$(CC) -c $(CFLAGS) fp2.c -o objs/fp2.o
|
||||
|
||||
objs/fp_asm.o: fp_asm.S
|
||||
$(CC) -c $(CFLAGS) fp_asm.S -o objs/fp_asm.o
|
||||
|
||||
objs/random.o: ../../../common/generic/randombytes_system.c
|
||||
$(CC) -c $(CFLAGS) ../../../common/generic/randombytes_system.c -o objs/random.o
|
||||
|
||||
lib: $(OBJECTS)
|
||||
rm -rf lib
|
||||
mkdir lib
|
||||
$(AR) lib/libtest.a $^
|
||||
$(RANLIB) lib/libtest.a
|
||||
|
||||
tests: lib
|
||||
$(CC) $(CFLAGS) -L./lib test/test_fp.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp -lgmp
|
||||
$(CC) $(CFLAGS) -L./lib test/test_fp2.c test/test_extras.c -ltest $(LDFLAGS) -o test_fp2 -lgmp
|
||||
|
||||
check: tests
|
||||
|
||||
.PHONY: clean
|
||||
|
||||
clean:
|
||||
rm -rf *.req objs lib test_fp*
|
||||
|
||||
192
src/gf/broadwell/lvl1/fp.c
Normal file
192
src/gf/broadwell/lvl1/fp.c
Normal file
@@ -0,0 +1,192 @@
|
||||
#include "include/fp.h"
|
||||
|
||||
const uint64_t p[NWORDS_FIELD] = { 0xffffffffffffffff, 0x252C9E49355147FF, 0x33A6A86587407437, 0x34E29E286B95D98C };
|
||||
const uint64_t R2[NWORDS_FIELD] = { 0x233625AE400674D4, 0x20AFD6C1025A1C2E, 0x30A841AB0920655D, 0x0D72E7D67C30CD3D };
|
||||
const uint64_t pp[NWORDS_FIELD] = { 0x01, 0x00, 0x00, 0x00 };
|
||||
|
||||
|
||||
void fp_set(digit_t* x, const digit_t val)
|
||||
{ // Set field element x = val, where val has wordsize
|
||||
|
||||
x[0] = val;
|
||||
for (unsigned int i = 1; i < NWORDS_FIELD; i++) {
|
||||
x[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void fp_mont_setone(digit_t* out1) {
|
||||
out1[0] = 0x4;
|
||||
out1[1] = UINT64_C(0x6b4d86db2abae000);
|
||||
out1[2] = UINT64_C(0x31655e69e2fe2f23);
|
||||
out1[3] = UINT64_C(0x2c75875e51a899cf);
|
||||
}
|
||||
|
||||
bool fp_is_equal(const digit_t* a, const digit_t* b)
|
||||
{ // Compare two field elements in constant time
|
||||
// Returns 1 (true) if a=b, 0 (false) otherwise
|
||||
digit_t r = 0;
|
||||
|
||||
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
|
||||
r |= a[i] ^ b[i];
|
||||
|
||||
return (bool)is_digit_zero_ct(r);
|
||||
}
|
||||
|
||||
bool fp_is_zero(const digit_t* a)
|
||||
{ // Is a field element zero?
|
||||
// Returns 1 (true) if a=0, 0 (false) otherwise
|
||||
digit_t r = 0;
|
||||
|
||||
for (unsigned int i = 0; i < NWORDS_FIELD; i++)
|
||||
r |= a[i] ^ 0;
|
||||
|
||||
return (bool)is_digit_zero_ct(r);
|
||||
}
|
||||
|
||||
void fp_copy(digit_t* out, const digit_t* a)
|
||||
{
|
||||
memcpy(out, a, NWORDS_FIELD*RADIX/8);
|
||||
}
|
||||
|
||||
void fp_neg(digit_t* out, const digit_t* a)
|
||||
{ // Modular negation, out = -a mod p
|
||||
// Input: a in [0, p-1]
|
||||
// Output: out in [0, p-1]
|
||||
unsigned int i, borrow = 0;
|
||||
|
||||
for (i = 0; i < NWORDS_FIELD; i++) {
|
||||
SUBC(out[i], borrow, ((digit_t*)p)[i], a[i], borrow);
|
||||
}
|
||||
fp_sub(out, out, (digit_t*)p);
|
||||
}
|
||||
|
||||
void fp_tomont(digit_t* out, const digit_t* a)
|
||||
{ // Conversion to Montgomery representation
|
||||
// out = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1].
|
||||
|
||||
fp_mul(out, a, (digit_t*)&R2);
|
||||
}
|
||||
|
||||
void fp_frommont(digit_t* out, const digit_t* a)
|
||||
{ // Conversion from Montgomery representation to standard representation
|
||||
// out = a*R^(-1) mod p, where a in [0, p-1].
|
||||
digit_t one[NWORDS_FIELD] = {0};
|
||||
|
||||
one[0] = 1;
|
||||
fp_mul(out, a, one);
|
||||
}
|
||||
|
||||
void MUL(digit_t* out, const digit_t a, const digit_t b)
|
||||
{ // Digit multiplication, digit*digit -> 2-digit result
|
||||
// Inputs: a, b in [0, 2^w-1], where w is the computer wordsize
|
||||
// Output: 0 < out < 2^(2w)-1
|
||||
register digit_t al, ah, bl, bh, temp;
|
||||
digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
|
||||
digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4);
|
||||
|
||||
al = a & mask_low; // Low part
|
||||
ah = a >> (sizeof(digit_t)*4); // High part
|
||||
bl = b & mask_low;
|
||||
bh = b >> (sizeof(digit_t)*4);
|
||||
|
||||
albl = al * bl;
|
||||
albh = al * bh;
|
||||
ahbl = ah * bl;
|
||||
ahbh = ah * bh;
|
||||
out[0] = albl & mask_low; // out00
|
||||
|
||||
res1 = albl >> (sizeof(digit_t)*4);
|
||||
res2 = ahbl & mask_low;
|
||||
res3 = albh & mask_low;
|
||||
temp = res1 + res2 + res3;
|
||||
carry = temp >> (sizeof(digit_t)*4);
|
||||
out[0] ^= temp << (sizeof(digit_t)*4); // out01
|
||||
|
||||
res1 = ahbl >> (sizeof(digit_t)*4);
|
||||
res2 = albh >> (sizeof(digit_t)*4);
|
||||
res3 = ahbh & mask_low;
|
||||
temp = res1 + res2 + res3 + carry;
|
||||
out[1] = temp & mask_low; // out10
|
||||
carry = temp & mask_high;
|
||||
out[1] ^= (ahbh & mask_high) + carry; // out11
|
||||
}
|
||||
|
||||
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords)
|
||||
{ // Multiprecision right shift
|
||||
digit_t bit_out = x[0] & 1;
|
||||
|
||||
for (unsigned int i = 0; i < nwords-1; i++) {
|
||||
SHIFTR(x[i+1], x[i], shift, x[i], RADIX);
|
||||
}
|
||||
x[nwords-1] >>= shift;
|
||||
return bit_out;
|
||||
}
|
||||
|
||||
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords)
|
||||
{ // Multiprecision left shift
|
||||
|
||||
for (int i = nwords-1; i > 0; i--) {
|
||||
SHIFTL(x[i], x[i-1], shift, x[i], RADIX);
|
||||
}
|
||||
x[0] <<= shift;
|
||||
}
|
||||
|
||||
static void fp_exp3div4(digit_t* out, const digit_t* a)
|
||||
{ // Fixed exponentiation out = a^((p-3)/4) mod p
|
||||
// Input: a in [0, p-1]
|
||||
// Output: out in [0, p-1]
|
||||
// Requirement: p = 3(mod 4)
|
||||
fp_t p_t, acc;
|
||||
digit_t bit;
|
||||
|
||||
memcpy((digit_t*)p_t, (digit_t*)p, NWORDS_FIELD*RADIX/8);
|
||||
memcpy((digit_t*)acc, (digit_t*)a, NWORDS_FIELD*RADIX/8);
|
||||
mp_shiftr(p_t, 1, NWORDS_FIELD);
|
||||
mp_shiftr(p_t, 1, NWORDS_FIELD);
|
||||
fp_set(out, 1);
|
||||
fp_tomont(out, out);
|
||||
|
||||
for (int i = 0; i < NWORDS_FIELD*RADIX-2; i++) {
|
||||
bit = p_t[0] & 1;
|
||||
mp_shiftr(p_t, 1, NWORDS_FIELD);
|
||||
if (bit == 1) {
|
||||
fp_mul(out, out, acc);
|
||||
}
|
||||
fp_sqr(acc, acc);
|
||||
}
|
||||
}
|
||||
|
||||
void fp_inv(digit_t* a)
|
||||
{ // Modular inversion, out = x^-1*R mod p, where R = 2^(w*nwords), w is the computer wordsize and nwords is the number of words to represent p
|
||||
// Input: a=xR in [0, p-1]
|
||||
// Output: out in [0, p-1]. It outputs 0 if the input does not have an inverse
|
||||
// Requirement: Ceiling(Log(p)) < w*nwords
|
||||
fp_t t;
|
||||
|
||||
fp_exp3div4(t, a);
|
||||
fp_sqr(t, t);
|
||||
fp_sqr(t, t);
|
||||
fp_mul(a, t, a); // a^(p-2)
|
||||
}
|
||||
|
||||
bool fp_is_square(const digit_t* a)
|
||||
{ // Is field element a square?
|
||||
// Output: out = 0 (false), 1 (true)
|
||||
fp_t t, one;
|
||||
|
||||
fp_exp3div4(t, a);
|
||||
fp_sqr(t, t);
|
||||
fp_mul(t, t, a); // a^((p-1)/2)
|
||||
fp_frommont(t, t);
|
||||
fp_set(one, 1);
|
||||
|
||||
return fp_is_equal(t, one);
|
||||
}
|
||||
|
||||
void fp_sqrt(digit_t* a)
|
||||
{ // Square root computation, out = a^((p+1)/4) mod p
|
||||
fp_t t;
|
||||
|
||||
fp_exp3div4(t, a);
|
||||
fp_mul(a, t, a); // a^((p+1)/4)
|
||||
}
|
||||
190
src/gf/broadwell/lvl1/fp2.c
Normal file
190
src/gf/broadwell/lvl1/fp2.c
Normal file
@@ -0,0 +1,190 @@
|
||||
#include <fp2.h>
|
||||
|
||||
extern const digit_t R[NWORDS_FIELD];
|
||||
|
||||
extern void fp2_sq_c0(fp2_t *out, const fp2_t *in);
|
||||
extern void fp2_sq_c1(fp_t *out, const fp2_t *in);
|
||||
|
||||
extern void fp2_mul_c0(fp_t *out, const fp2_t *in0, const fp2_t *in1);
|
||||
extern void fp2_mul_c1(fp_t *out, const fp2_t *in0, const fp2_t *in1);
|
||||
|
||||
/* Arithmetic modulo X^2 + 1 */
|
||||
|
||||
void fp2_set(fp2_t* x, const digit_t val)
|
||||
{
|
||||
fp_set(x->re, val);
|
||||
fp_set(x->im, 0);
|
||||
}
|
||||
|
||||
bool fp2_is_zero(const fp2_t* a)
|
||||
{ // Is a GF(p^2) element zero?
|
||||
// Returns 1 (true) if a=0, 0 (false) otherwise
|
||||
|
||||
return fp_is_zero(a->re) & fp_is_zero(a->im);
|
||||
}
|
||||
|
||||
bool fp2_is_equal(const fp2_t* a, const fp2_t* b)
|
||||
{ // Compare two GF(p^2) elements in constant time
|
||||
// Returns 1 (true) if a=b, 0 (false) otherwise
|
||||
|
||||
return fp_is_equal(a->re, b->re) & fp_is_equal(a->im, b->im);
|
||||
}
|
||||
|
||||
void fp2_copy(fp2_t* x, const fp2_t* y)
|
||||
{
|
||||
fp_copy(x->re, y->re);
|
||||
fp_copy(x->im, y->im);
|
||||
}
|
||||
|
||||
fp2_t fp2_non_residue()
|
||||
{ // 2 + i is a quadratic non-residue for p1913
|
||||
fp_t one = {0};
|
||||
fp2_t res;
|
||||
|
||||
one[0] = 1;
|
||||
fp_tomont(one, one);
|
||||
fp_add(res.re, one, one);
|
||||
fp_copy(res.im, one);
|
||||
return res;
|
||||
}
|
||||
|
||||
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z)
|
||||
{
|
||||
fp_add(x->re, y->re, z->re);
|
||||
fp_add(x->im, y->im, z->im);
|
||||
}
|
||||
|
||||
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z)
|
||||
{
|
||||
fp_sub(x->re, y->re, z->re);
|
||||
fp_sub(x->im, y->im, z->im);
|
||||
}
|
||||
|
||||
void fp2_neg(fp2_t* x, const fp2_t* y)
|
||||
{
|
||||
fp_neg(x->re, y->re);
|
||||
fp_neg(x->im, y->im);
|
||||
}
|
||||
|
||||
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z)
|
||||
{
|
||||
fp_t t;
|
||||
|
||||
fp2_mul_c0(&t, y, z); // c0 = a0*b0 - a1*b1
|
||||
fp2_mul_c1(&x->im, y, z); // c1 = a0*b1 + a1*b0
|
||||
x->re[0] = t[0]; x->re[1] = t[1]; x->re[2] = t[2]; x->re[3] = t[3];
|
||||
}
|
||||
|
||||
void fp2_sqr(fp2_t* x, const fp2_t* y) {
|
||||
fp2_t t;
|
||||
|
||||
fp2_sq_c0(&t, y); // c0 = (a0+a1)(a0-a1)
|
||||
fp2_sq_c1(&x->im, y); // c1 = 2a0*a1
|
||||
x->re[0] = t.re[0]; x->re[1] = t.re[1]; x->re[2] = t.re[2]; x->re[3] = t.re[3];
|
||||
}
|
||||
|
||||
void fp2_inv(fp2_t* x)
|
||||
{
|
||||
fp_t t0, t1;
|
||||
|
||||
fp_sqr(t0, x->re);
|
||||
fp_sqr(t1, x->im);
|
||||
fp_add(t0, t0, t1);
|
||||
fp_inv(t0);
|
||||
fp_mul(x->re, x->re, t0);
|
||||
fp_mul(x->im, x->im, t0);
|
||||
fp_neg(x->im, x->im);
|
||||
}
|
||||
|
||||
bool fp2_is_square(const fp2_t* x)
|
||||
{
|
||||
fp_t t0, t1;
|
||||
|
||||
fp_sqr(t0, x->re);
|
||||
fp_sqr(t1, x->im);
|
||||
fp_add(t0, t0, t1);
|
||||
|
||||
return fp_is_square(t0);
|
||||
}
|
||||
|
||||
void fp2_frob(fp2_t* x, const fp2_t* y)
|
||||
{
|
||||
memcpy((digit_t*)x->re, (digit_t*)y->re, NWORDS_FIELD*RADIX/8);
|
||||
fp_neg(x->im, y->im);
|
||||
}
|
||||
|
||||
void fp2_tomont(fp2_t* x, const fp2_t* y)
|
||||
{
|
||||
fp_tomont(x->re, y->re);
|
||||
fp_tomont(x->im, y->im);
|
||||
}
|
||||
|
||||
void fp2_frommont(fp2_t* x, const fp2_t* y)
|
||||
{
|
||||
fp_frommont(x->re, y->re);
|
||||
fp_frommont(x->im, y->im);
|
||||
}
|
||||
|
||||
// NOTE: old, non-constant-time implementation. Could be optimized
|
||||
void fp2_sqrt(fp2_t* x)
|
||||
{
|
||||
fp_t sdelta, re, tmp1, tmp2, inv2, im;
|
||||
|
||||
if (fp_is_zero(x->im)) {
|
||||
if (fp_is_square(x->re)) {
|
||||
fp_sqrt(x->re);
|
||||
return;
|
||||
} else {
|
||||
fp_neg(x->im, x->re);
|
||||
fp_sqrt(x->im);
|
||||
fp_set(x->re, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// sdelta = sqrt(re^2 + im^2)
|
||||
fp_sqr(sdelta, x->re);
|
||||
fp_sqr(tmp1, x->im);
|
||||
fp_add(sdelta, sdelta, tmp1);
|
||||
fp_sqrt(sdelta);
|
||||
|
||||
fp_set(inv2, 2);
|
||||
fp_tomont(inv2, inv2); // inv2 <- 2
|
||||
fp_inv(inv2);
|
||||
fp_add(re, x->re, sdelta);
|
||||
fp_mul(re, re, inv2);
|
||||
memcpy((digit_t*)tmp2, (digit_t*)re, NWORDS_FIELD*RADIX/8);
|
||||
|
||||
if (!fp_is_square(tmp2)) {
|
||||
fp_sub(re, x->re, sdelta);
|
||||
fp_mul(re, re, inv2);
|
||||
}
|
||||
|
||||
fp_sqrt(re);
|
||||
memcpy((digit_t*)im, (digit_t*)re, NWORDS_FIELD*RADIX/8);
|
||||
|
||||
fp_inv(im);
|
||||
fp_mul(im, im, inv2);
|
||||
fp_mul(x->im, im, x->im);
|
||||
memcpy((digit_t*)x->re, (digit_t*)re, NWORDS_FIELD*RADIX/8);
|
||||
}
|
||||
|
||||
// Lexicographic comparison of two field elements. Returns +1 if x > y, -1 if x < y, 0 if x = y
|
||||
int fp2_cmp(fp2_t* x, fp2_t* y){
|
||||
fp2_t a, b;
|
||||
fp2_frommont(&a, x);
|
||||
fp2_frommont(&b, y);
|
||||
for(int i = NWORDS_FIELD-1; i >= 0; i--){
|
||||
if(a.re[i] > b.re[i])
|
||||
return 1;
|
||||
if(a.re[i] < b.re[i])
|
||||
return -1;
|
||||
}
|
||||
for(int i = NWORDS_FIELD-1; i >= 0; i--){
|
||||
if(a.im[i] > b.im[i])
|
||||
return 1;
|
||||
if(a.im[i] < b.im[i])
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
555
src/gf/broadwell/lvl1/fp_asm.S
Normal file
555
src/gf/broadwell/lvl1/fp_asm.S
Normal file
@@ -0,0 +1,555 @@
|
||||
.intel_syntax noprefix
|
||||
|
||||
.set pbytes,32
|
||||
.set plimbs,4
|
||||
|
||||
.global p_plus_1
|
||||
p_plus_1: .quad 0x0000000000000000, 0x252C9E4935514800, 0x33A6A86587407437, 0x34E29E286B95D98C
|
||||
|
||||
.text
|
||||
.p2align 4,,15
|
||||
|
||||
.global fp_add
|
||||
fp_add:
|
||||
push r12
|
||||
xor rax, rax
|
||||
mov r8, [rsi]
|
||||
mov r9, [rsi+8]
|
||||
mov r10, [rsi+16]
|
||||
mov r11, [rsi+24]
|
||||
add r8, [rdx]
|
||||
adc r9, [rdx+8]
|
||||
adc r10, [rdx+16]
|
||||
adc r11, [rdx+24]
|
||||
mov r12, [rip+p]
|
||||
sub r8, r12
|
||||
mov rcx, [rip+p+8]
|
||||
sbb r9, rcx
|
||||
mov rsi, [rip+p+16]
|
||||
sbb r10, rsi
|
||||
mov rdx, [rip+p+24]
|
||||
sbb r11, rdx
|
||||
sbb rax, 0
|
||||
|
||||
and r12, rax
|
||||
and rcx, rax
|
||||
and rsi, rax
|
||||
and rdx, rax
|
||||
|
||||
add r8, r12
|
||||
adc r9, rcx
|
||||
adc r10, rsi
|
||||
adc r11, rdx
|
||||
mov [rdi], r8
|
||||
mov [rdi+8], r9
|
||||
mov [rdi+16], r10
|
||||
mov [rdi+24], r11
|
||||
pop r12
|
||||
ret
|
||||
|
||||
.global fp_sub
|
||||
fp_sub:
|
||||
push r12
|
||||
xor rax, rax
|
||||
mov r8, [rsi]
|
||||
mov r9, [rsi+8]
|
||||
mov r10, [rsi+16]
|
||||
mov r11, [rsi+24]
|
||||
sub r8, [rdx]
|
||||
sbb r9, [rdx+8]
|
||||
sbb r10, [rdx+16]
|
||||
sbb r11, [rdx+24]
|
||||
sbb rax, 0
|
||||
|
||||
mov r12, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rsi, [rip+p+16]
|
||||
mov rdx, [rip+p+24]
|
||||
and r12, rax
|
||||
and rcx, rax
|
||||
and rsi, rax
|
||||
and rdx, rax
|
||||
add r8, r12
|
||||
adc r9, rcx
|
||||
adc r10, rsi
|
||||
adc r11, rdx
|
||||
mov [rdi], r8
|
||||
mov [rdi+8], r9
|
||||
mov [rdi+16], r10
|
||||
mov [rdi+24], r11
|
||||
pop r12
|
||||
ret
|
||||
|
||||
///////////////////////////////////////////////////////////////// MACROS
|
||||
// z = a x bi + z
|
||||
// Inputs: base memory pointer M1 (a),
|
||||
// bi pre-stored in rdx,
|
||||
// accumulator z in [Z0:Z4]
|
||||
// Output: [Z0:Z4]
|
||||
// Temps: regs T0:T1
|
||||
/////////////////////////////////////////////////////////////////
|
||||
.macro MULADD64x256 M1, Z0, Z1, Z2, Z3, Z4, T0, T1, C
|
||||
mulx \T0, \T1, \M1 // A0*B0
|
||||
xor \C, \C
|
||||
adox \Z0, \T1
|
||||
adox \Z1, \T0
|
||||
mulx \T0, \T1, 8\M1 // A0*B1
|
||||
adcx \Z1, \T1
|
||||
adox \Z2, \T0
|
||||
mulx \T0, \T1, 16\M1 // A0*B2
|
||||
adcx \Z2, \T1
|
||||
adox \Z3, \T0
|
||||
mulx \T0, \T1, 24\M1 // A0*B3
|
||||
adcx \Z3, \T1
|
||||
adox \Z4, \T0
|
||||
adc \Z4, 0
|
||||
.endm
|
||||
|
||||
.macro MULADD64x192 M1, Z0, Z1, Z2, Z3, T0, T1
|
||||
mulx \T0, \T1, \M1 // A0*B0
|
||||
xor rax, rax
|
||||
adox \Z0, \T1
|
||||
adox \Z1, \T0
|
||||
mulx \T0, \T1, 8\M1 // A0*B1
|
||||
adcx \Z1, \T1
|
||||
adox \Z2, \T0
|
||||
mulx \T0, \T1, 16\M1 // A0*B2
|
||||
adcx \Z2, \T1
|
||||
adox \Z3, \T0
|
||||
adc \Z3, 0
|
||||
.endm
|
||||
|
||||
//***********************************************************************
|
||||
// Multiplication in GF(p^2), non-complex part
|
||||
// Operation: c [rdi] = a0 x b0 - a1 x b1
|
||||
// Inputs: a = [a1, a0] stored in [rsi]
|
||||
// b = [b1, b0] stored in [rdx]
|
||||
// Output: c stored in [rdi]
|
||||
//***********************************************************************
|
||||
.global fp2_mul_c0
|
||||
fp2_mul_c0:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
mov rcx, rdx
|
||||
|
||||
// [rdi0:3] <- p - b1
|
||||
mov r8, [rip+p]
|
||||
mov r9, [rip+p+8]
|
||||
mov r10, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
mov rax, [rcx+32]
|
||||
mov rdx, [rcx+40]
|
||||
sub r8, rax
|
||||
sbb r9, rdx
|
||||
mov rax, [rcx+48]
|
||||
mov rdx, [rcx+56]
|
||||
sbb r10, rax
|
||||
sbb r11, rdx
|
||||
mov [rdi], r8
|
||||
mov [rdi+8], r9
|
||||
mov [rdi+16], r10
|
||||
mov [rdi+24], r11
|
||||
|
||||
// [r8:r12] <- z = a0 x b00 - a1 x b10
|
||||
mov rdx, [rcx]
|
||||
mulx r9, r8, [rsi]
|
||||
xor rax, rax
|
||||
mulx r10, r11, [rsi+8]
|
||||
adox r9, r11
|
||||
mulx r11, r12, [rsi+16]
|
||||
adox r10, r12
|
||||
mulx r12, r13, [rsi+24]
|
||||
adox r11, r13
|
||||
adox r12, rax
|
||||
|
||||
mov rdx, [rdi]
|
||||
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
|
||||
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r8 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
|
||||
|
||||
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
|
||||
mov rdx, [rcx+8]
|
||||
MULADD64x256 [rsi], r9, r10, r11, r12, r8, r13, r14, r8
|
||||
mov rdx, [rdi+8]
|
||||
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
|
||||
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r9 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
|
||||
|
||||
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
|
||||
mov rdx, [rcx+16]
|
||||
MULADD64x256 [rsi], r10, r11, r12, r8, r9, r13, r14, r9
|
||||
mov rdx, [rdi+16]
|
||||
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
|
||||
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r10 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
|
||||
|
||||
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
|
||||
mov rdx, [rcx+24]
|
||||
MULADD64x256 [rsi], r11, r12, r8, r9, r10, r13, r14, r10
|
||||
mov rdx, [rdi+24]
|
||||
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
|
||||
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r11 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
|
||||
|
||||
// Final correction
|
||||
mov rsi, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rdx, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
sub r12, rsi
|
||||
sbb r8, rcx
|
||||
sbb r9, rdx
|
||||
sbb r10, r11
|
||||
sbb rax, 0
|
||||
and rsi, rax
|
||||
and rcx, rax
|
||||
and rdx, rax
|
||||
and r11, rax
|
||||
add r12, rsi
|
||||
adc r8, rcx
|
||||
adc r9, rdx
|
||||
adc r10, r11
|
||||
|
||||
mov [rdi], r12
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r9
|
||||
mov [rdi+24], r10
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
//***********************************************************************
|
||||
// Multiplication in GF(p^2), complex part
|
||||
// Operation: c [rdi] = a0 x b1 + a1 x b0
|
||||
// Inputs: a = [a1, a0] stored in [rsi]
|
||||
// b = [b1, b0] stored in [rdx]
|
||||
// Output: c stored in [rdi]
|
||||
//***********************************************************************
|
||||
.global fp2_mul_c1
|
||||
fp2_mul_c1:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
mov rcx, rdx
|
||||
|
||||
// [r8:r12] <- z = a0 x b10 + a1 x b00
|
||||
mov rdx, [rcx+32]
|
||||
mulx r9, r8, [rsi]
|
||||
xor rax, rax
|
||||
mulx r10, r11, [rsi+8]
|
||||
adox r9, r11
|
||||
mulx r11, r12, [rsi+16]
|
||||
adox r10, r12
|
||||
mulx r12, r13, [rsi+24]
|
||||
adox r11, r13
|
||||
adox r12, rax
|
||||
|
||||
mov rdx, [rcx]
|
||||
MULADD64x256 [rsi+32], r8, r9, r10, r11, r12, r13, r14, rax
|
||||
// [r9:r12] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r8 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r9, r10, r11, r12, r13, r14
|
||||
|
||||
// [r9:r12, r8] <- z = a0 x b01 - a1 x b11 + z
|
||||
mov rdx, [rcx+40]
|
||||
MULADD64x256 [rsi], r9, r10, r11, r12, r8, r13, r14, r8
|
||||
mov rdx, [rcx+8]
|
||||
MULADD64x256 [rsi+32], r9, r10, r11, r12, r8, r13, r14, rax
|
||||
// [r10:r12, r8] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r9 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r10, r11, r12, r8, r13, r14
|
||||
|
||||
// [r10:r12, r8:r9] <- z = a0 x b02 - a1 x b12 + z
|
||||
mov rdx, [rcx+48]
|
||||
MULADD64x256 [rsi], r10, r11, r12, r8, r9, r13, r14, r9
|
||||
mov rdx, [rcx+16]
|
||||
MULADD64x256 [rsi+32], r10, r11, r12, r8, r9, r13, r14, rax
|
||||
// [r11:r12, r8:r9] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r10 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r11, r12, r8, r9, r13, r14
|
||||
|
||||
// [r11:r12, r8:r10] <- z = a0 x b03 - a1 x b13 + z
|
||||
mov rdx, [rcx+56]
|
||||
MULADD64x256 [rsi], r11, r12, r8, r9, r10, r13, r14, r10
|
||||
mov rdx, [rcx+24]
|
||||
MULADD64x256 [rsi+32], r11, r12, r8, r9, r10, r13, r14, rax
|
||||
// [r12, r8:r10] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, r11 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], r12, r8, r9, r10, r13, r14
|
||||
|
||||
// Final correction
|
||||
mov rsi, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rdx, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
sub r12, rsi
|
||||
sbb r8, rcx
|
||||
sbb r9, rdx
|
||||
sbb r10, r11
|
||||
sbb rax, 0
|
||||
and rsi, rax
|
||||
and rcx, rax
|
||||
and rdx, rax
|
||||
and r11, rax
|
||||
add r12, rsi
|
||||
adc r8, rcx
|
||||
adc r9, rdx
|
||||
adc r10, r11
|
||||
|
||||
mov [rdi], r12
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r9
|
||||
mov [rdi+24], r10
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
///////////////////////////////////////////////////////////////// MACRO
|
||||
// z = a x b (mod p)
|
||||
// Inputs: base memory pointers M0 (a), M1 (b)
|
||||
// bi pre-stored in rdx,
|
||||
// accumulator z in [Z0:Z4], pre-stores a0 x b
|
||||
// Output: [Z0:Z4]
|
||||
// Temps: regs T0:T1
|
||||
/////////////////////////////////////////////////////////////////
|
||||
.macro FPMUL256x256 M0, M1, Z0, Z1, Z2, Z3, Z4, T0, T1
|
||||
// [Z1:Z4] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, \Z0 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], \Z1, \Z2, \Z3, \Z4, \T0, \T1
|
||||
|
||||
// [Z1:Z4, Z0] <- z = a01 x a1 + z
|
||||
mov rdx, 8\M0
|
||||
MULADD64x256 \M1, \Z1, \Z2, \Z3, \Z4, \Z0, \T0, \T1, \Z0
|
||||
// [Z2:Z4, Z0] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, \Z1 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], \Z2, \Z3, \Z4, \Z0, \T0, \T1
|
||||
|
||||
// [Z2:Z4, Z0:Z1] <- z = a02 x a1 + z
|
||||
mov rdx, 16\M0
|
||||
MULADD64x256 \M1, \Z2, \Z3, \Z4, \Z0, \Z1, \T0, \T1, \Z1
|
||||
// [Z3:Z4, Z0:Z1] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, \Z2 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], \Z3, \Z4, \Z0, \Z1, \T0, \T1
|
||||
|
||||
// [Z3:Z4, Z0:Z2] <- z = a03 x a1 + z
|
||||
mov rdx, 24\M0
|
||||
MULADD64x256 \M1, \Z3, \Z4, \Z0, \Z1, \Z2, \T0, \T1, \Z2
|
||||
// [Z4, Z0:Z2] <- z = (z0 x p_plus_1 + z)/2^64
|
||||
mov rdx, \Z3 // rdx <- z0
|
||||
MULADD64x192 [rip+p_plus_1+8], \Z4, \Z0, \Z1, \Z2, \T0, \T1
|
||||
.endm
|
||||
|
||||
//***********************************************************************
|
||||
// Squaring in GF(p^2), non-complex part
|
||||
// Operation: c [rdi] = (a0+a1) x (a0-a1)
|
||||
// Inputs: a = [a1, a0] stored in [rsi]
|
||||
// Output: c stored in [rdi]
|
||||
//***********************************************************************
|
||||
.global fp2_sq_c0
|
||||
fp2_sq_c0:
|
||||
push r12
|
||||
push r13
|
||||
|
||||
// a0 + a1
|
||||
mov rdx, [rsi]
|
||||
mov r9, [rsi+8]
|
||||
mov r10, [rsi+16]
|
||||
mov r11, [rsi+24]
|
||||
add rdx, [rsi+32]
|
||||
adc r9, [rsi+40]
|
||||
adc r10, [rsi+48]
|
||||
adc r11, [rsi+56]
|
||||
mov [rdi], rdx
|
||||
mov [rdi+8], r9
|
||||
mov [rdi+16], r10
|
||||
mov [rdi+24], r11
|
||||
|
||||
// a0 - a1 + p
|
||||
mov r8, [rsi]
|
||||
mov r10, [rsi+8]
|
||||
mov r12, [rsi+16]
|
||||
mov r13, [rsi+24]
|
||||
sub r8, [rsi+32]
|
||||
sbb r10, [rsi+40]
|
||||
sbb r12, [rsi+48]
|
||||
sbb r13, [rsi+56]
|
||||
add r8, [rip+p]
|
||||
adc r10, [rip+p+8]
|
||||
adc r12, [rip+p+16]
|
||||
adc r13, [rip+p+24]
|
||||
mov [rdi+32], r8
|
||||
mov [rdi+40], r10
|
||||
mov [rdi+48], r12
|
||||
mov [rdi+56], r13
|
||||
|
||||
// [r8:r12] <- z = a00 x a1
|
||||
mulx r9, r8, r8
|
||||
xor rax, rax
|
||||
mulx r10, r11, r10
|
||||
adox r9, r11
|
||||
mulx r11, r12, r12
|
||||
adox r10, r12
|
||||
mulx r12, r13, r13
|
||||
adox r11, r13
|
||||
adox r12, rax
|
||||
|
||||
FPMUL256x256 [rdi], [rdi+32], r8, r9, r10, r11, r12, r13, rcx
|
||||
|
||||
// Final correction
|
||||
mov rsi, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rdx, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
sub r12, rsi
|
||||
sbb r8, rcx
|
||||
sbb r9, rdx
|
||||
sbb r10, r11
|
||||
sbb rax, 0
|
||||
and rsi, rax
|
||||
and rcx, rax
|
||||
and rdx, rax
|
||||
and r11, rax
|
||||
add r12, rsi
|
||||
adc r8, rcx
|
||||
adc r9, rdx
|
||||
adc r10, r11
|
||||
|
||||
mov [rdi], r12
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r9
|
||||
mov [rdi+24], r10
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
//***********************************************************************
|
||||
// Squaring in GF(p^2), complex part
|
||||
// Operation: c [rdi] = 2a0 x a1
|
||||
// Inputs: a = [a1, a0] stored in [reg_p1]
|
||||
// Output: c stored in [rdi]
|
||||
//***********************************************************************
|
||||
.global fp2_sq_c1
|
||||
fp2_sq_c1:
|
||||
push r12
|
||||
push r13
|
||||
|
||||
mov rdx, [rsi]
|
||||
mov r9, [rsi+8]
|
||||
mov r10, [rsi+16]
|
||||
mov r11, [rsi+24]
|
||||
add rdx, rdx
|
||||
adc r9, r9
|
||||
adc r10, r10
|
||||
adc r11, r11
|
||||
sub rsp, 32
|
||||
mov [rsp+8], r9
|
||||
mov [rsp+16], r10
|
||||
mov [rsp+24], r11
|
||||
|
||||
// [r8:r12] <- z = a00 x a1
|
||||
mulx r9, r8, [rsi+32]
|
||||
xor rax, rax
|
||||
mulx r10, r11, [rsi+40]
|
||||
adox r9, r11
|
||||
mulx r11, r12, [rsi+48]
|
||||
adox r10, r12
|
||||
mulx r12, r13, [rsi+56]
|
||||
adox r11, r13
|
||||
adox r12, rax
|
||||
|
||||
FPMUL256x256 [rsp], [rsi+32], r8, r9, r10, r11, r12, r13, rcx
|
||||
add rsp, 32
|
||||
|
||||
// Final correction
|
||||
mov rsi, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rdx, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
sub r12, rsi
|
||||
sbb r8, rcx
|
||||
sbb r9, rdx
|
||||
sbb r10, r11
|
||||
sbb rax, 0
|
||||
and rsi, rax
|
||||
and rcx, rax
|
||||
and rdx, rax
|
||||
and r11, rax
|
||||
add r12, rsi
|
||||
adc r8, rcx
|
||||
adc r9, rdx
|
||||
adc r10, r11
|
||||
|
||||
mov [rdi], r12
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r9
|
||||
mov [rdi+24], r10
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
//***********************************************************************
|
||||
// Field multiplication in GF(p)
|
||||
// Operation: c = a x b mod p
|
||||
// Inputs: a stored in [rsi], b stored in [rdx]
|
||||
// Output: c stored in [rdi]
|
||||
//***********************************************************************
|
||||
.global fp_mul
|
||||
fp_mul:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
mov rcx, rdx
|
||||
|
||||
// [r8:r12] <- z = a x b0
|
||||
mov rdx, [rcx]
|
||||
mulx r9, r8, [rsi]
|
||||
xor rax, rax
|
||||
mulx r10, r11, [rsi+8]
|
||||
adox r9, r11
|
||||
mulx r11, r12, [rsi+16]
|
||||
adox r10, r12
|
||||
mulx r12, r13, [rsi+24]
|
||||
adox r11, r13
|
||||
adox r12, rax
|
||||
|
||||
FPMUL256x256 [rcx], [rsi], r8, r9, r10, r11, r12, r13, r14
|
||||
|
||||
// Final correction
|
||||
mov rsi, [rip+p]
|
||||
mov rcx, [rip+p+8]
|
||||
mov rdx, [rip+p+16]
|
||||
mov r11, [rip+p+24]
|
||||
sub r12, rsi
|
||||
sbb r8, rcx
|
||||
sbb r9, rdx
|
||||
sbb r10, r11
|
||||
sbb rax, 0
|
||||
and rsi, rax
|
||||
and rcx, rax
|
||||
and rdx, rax
|
||||
and r11, rax
|
||||
add r12, rsi
|
||||
adc r8, rcx
|
||||
adc r9, rdx
|
||||
adc r10, r11
|
||||
|
||||
mov [rdi], r12
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r9
|
||||
mov [rdi+24], r10
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
|
||||
.global fp_sqr
|
||||
fp_sqr:
|
||||
mov rdx, rsi
|
||||
jmp fp_mul
|
||||
76
src/gf/broadwell/lvl1/include/fp.h
Normal file
76
src/gf/broadwell/lvl1/include/fp.h
Normal file
@@ -0,0 +1,76 @@
|
||||
#ifndef FP_H
|
||||
#define FP_H
|
||||
|
||||
//////////////////////////////////////////////// NOTE: this is placed here for now
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <tutil.h>
|
||||
#include <fp_constants.h>
|
||||
|
||||
typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements
|
||||
|
||||
void fp_set(digit_t* x, const digit_t val);
|
||||
bool fp_is_equal(const digit_t* a, const digit_t* b);
|
||||
bool fp_is_zero(const digit_t* a);
|
||||
void fp_copy(digit_t* out, const digit_t* a);
|
||||
digit_t mp_shiftr(digit_t* x, const unsigned int shift, const unsigned int nwords);
|
||||
void mp_shiftl(digit_t* x, const unsigned int shift, const unsigned int nwords);
|
||||
void fp_add(digit_t* out, const digit_t* a, const digit_t* b);
|
||||
void fp_sub(digit_t* out, const digit_t* a, const digit_t* b);
|
||||
void fp_neg(digit_t* out, const digit_t* a);
|
||||
void fp_sqr(digit_t* out, const digit_t* a);
|
||||
void fp_mul(digit_t* out, const digit_t* a, const digit_t* b);
|
||||
void MUL(digit_t* out, const digit_t a, const digit_t b);
|
||||
void fp_inv(digit_t* x);
|
||||
bool fp_is_square(const digit_t* a);
|
||||
void fp_sqrt(digit_t* a);
|
||||
void fp_tomont(digit_t* out, const digit_t* a);
|
||||
void fp_frommont(digit_t* out, const digit_t* a);
|
||||
void fp_mont_setone(digit_t* out);
|
||||
|
||||
/********************** Constant-time unsigned comparisons ***********************/
|
||||
|
||||
// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
|
||||
|
||||
static inline unsigned int is_digit_nonzero_ct(digit_t x)
|
||||
{ // Is x != 0?
|
||||
return (unsigned int)((x | (0 - x)) >> (RADIX - 1));
|
||||
}
|
||||
|
||||
static inline unsigned int is_digit_zero_ct(digit_t x)
|
||||
{ // Is x = 0?
|
||||
return (unsigned int)(1 ^ is_digit_nonzero_ct(x));
|
||||
}
|
||||
|
||||
static inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y)
|
||||
{ // Is x < y?
|
||||
return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
|
||||
}
|
||||
|
||||
/********************** Platform-independent macros for digit-size operations **********************/
|
||||
|
||||
// Digit addition with carry
|
||||
#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \
|
||||
{ digit_t tempReg = (addend1) + (digit_t)(carryIn); \
|
||||
(sumOut) = (addend2) + tempReg; \
|
||||
(carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); }
|
||||
|
||||
// Digit subtraction with borrow
|
||||
#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \
|
||||
{ digit_t tempReg = (minuend) - (subtrahend); \
|
||||
unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \
|
||||
(differenceOut) = tempReg - (digit_t)(borrowIn); \
|
||||
(borrowOut) = borrowReg; }
|
||||
|
||||
// Shift right with flexible datatype
|
||||
#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
|
||||
(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
|
||||
|
||||
// Digit shift left
|
||||
#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
|
||||
(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift)));
|
||||
|
||||
#endif
|
||||
29
src/gf/broadwell/lvl1/include/fp2.h
Normal file
29
src/gf/broadwell/lvl1/include/fp2.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef FP2_H
|
||||
#define FP2_H
|
||||
|
||||
#include "fp.h"
|
||||
|
||||
// Structure for representing elements in GF(p^2)
|
||||
typedef struct fp2_t {
|
||||
fp_t re, im;
|
||||
} fp2_t;
|
||||
|
||||
void fp2_set(fp2_t* x, const digit_t val);
|
||||
bool fp2_is_zero(const fp2_t* a);
|
||||
bool fp2_is_equal(const fp2_t* a, const fp2_t* b);
|
||||
void fp2_copy(fp2_t* x, const fp2_t* y);
|
||||
fp2_t fp2_non_residue();
|
||||
void fp2_add(fp2_t* x, const fp2_t* y, const fp2_t* z);
|
||||
void fp2_sub(fp2_t* x, const fp2_t* y, const fp2_t* z);
|
||||
void fp2_neg(fp2_t* x, const fp2_t* y);
|
||||
void fp2_mul(fp2_t* x, const fp2_t* y, const fp2_t* z);
|
||||
void fp2_sqr(fp2_t* x, const fp2_t* y);
|
||||
void fp2_inv(fp2_t* x);
|
||||
bool fp2_is_square(const fp2_t* x);
|
||||
void fp2_frob(fp2_t* x, const fp2_t* y);
|
||||
void fp2_sqrt(fp2_t* x);
|
||||
void fp2_tomont(fp2_t* x, const fp2_t* y);
|
||||
void fp2_frommont(fp2_t* x, const fp2_t* y);
|
||||
int fp2_cmp(fp2_t* x, fp2_t* y);
|
||||
|
||||
#endif
|
||||
9
src/gf/broadwell/lvl1/test/CMakeLists.txt
Normal file
9
src/gf/broadwell/lvl1/test/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
add_executable(sqisign_test_gf_${SVARIANT_LOWER}_fp test_fp.c test_extras.c)
|
||||
target_link_libraries(sqisign_test_gf_${SVARIANT_LOWER}_fp ${LIB_GF_${SVARIANT_UPPER}})
|
||||
target_include_directories(sqisign_test_gf_${SVARIANT_LOWER}_fp PRIVATE ../include ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC})
|
||||
add_test(sqisign_test_gf_${SVARIANT_LOWER}_fp sqisign_test_gf_${SVARIANT_LOWER}_fp test ${SQISIGN_TEST_REPS})
|
||||
|
||||
add_executable(sqisign_test_gf_${SVARIANT_LOWER}_fp2 test_fp2.c test_extras.c)
|
||||
target_link_libraries(sqisign_test_gf_${SVARIANT_LOWER}_fp2 ${LIB_GF_${SVARIANT_UPPER}})
|
||||
target_include_directories(sqisign_test_gf_${SVARIANT_LOWER}_fp2 PRIVATE ../include ${INC_COMMON} ${INC_PRECOMP_${SVARIANT_UPPER}} ${INC_PUBLIC})
|
||||
add_test(sqisign_test_gf_${SVARIANT_LOWER}_fp2 sqisign_test_gf_${SVARIANT_LOWER}_fp2 test ${SQISIGN_TEST_REPS})
|
||||
74
src/gf/broadwell/lvl1/test/test_extras.c
Normal file
74
src/gf/broadwell/lvl1/test/test_extras.c
Normal file
@@ -0,0 +1,74 @@
|
||||
#include "test_extras.h"
|
||||
#include <bench.h>
|
||||
|
||||
// Global constants
|
||||
extern const digit_t p[NWORDS_FIELD];
|
||||
extern const digit_t R2[NWORDS_FIELD];
|
||||
|
||||
#if 0
|
||||
int64_t cpucycles(void)
|
||||
{ // Access system counter for benchmarking
|
||||
unsigned int hi, lo;
|
||||
|
||||
asm volatile ("rdtsc\n\t" : "=a" (lo), "=d"(hi));
|
||||
return ((int64_t)lo) | (((int64_t)hi) << 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int compare_words(digit_t* a, digit_t* b, unsigned int nwords)
|
||||
{ // Comparing "nword" elements, a=b? : (1) a>b, (0) a=b, (-1) a<b
|
||||
// SECURITY NOTE: this function does not have constant-time execution. TO BE USED FOR TESTING ONLY.
|
||||
int i;
|
||||
|
||||
for (i = nwords-1; i >= 0; i--)
|
||||
{
|
||||
if (a[i] > b[i]) return 1;
|
||||
else if (a[i] < b[i]) return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void sub_test(digit_t* out, digit_t* a, digit_t* b, unsigned int nwords)
|
||||
{ // Subtraction without borrow, out = a-b where a>b
|
||||
// SECURITY NOTE: this function does not have constant-time execution. It is for TESTING ONLY.
|
||||
unsigned int i;
|
||||
digit_t res, carry, borrow = 0;
|
||||
|
||||
for (i = 0; i < nwords; i++)
|
||||
{
|
||||
res = a[i] - b[i];
|
||||
carry = (a[i] < b[i]);
|
||||
out[i] = res - borrow;
|
||||
borrow = carry || (res < borrow);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void fprandom_test(digit_t* a)
|
||||
{ // Generating a pseudo-random field element in [0, p-1]
|
||||
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
|
||||
unsigned int i, diff = 256-254, nwords = NWORDS_FIELD;
|
||||
unsigned char* string = NULL;
|
||||
|
||||
string = (unsigned char*)a;
|
||||
for (i = 0; i < sizeof(digit_t)*nwords; i++) {
|
||||
*(string + i) = (unsigned char)rand(); // Obtain 256-bit number
|
||||
}
|
||||
a[nwords-1] &= (((digit_t)(-1) << diff) >> diff);
|
||||
|
||||
while (compare_words((digit_t*)p, a, nwords) < 1) { // Force it to [0, modulus-1]
|
||||
sub_test(a, a, (digit_t*)p, nwords);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void fp2random_test(fp2_t* a)
|
||||
{ // Generating a pseudo-random element in GF(p^2)
|
||||
// SECURITY NOTE: distribution is not fully uniform. TO BE USED FOR TESTING ONLY.
|
||||
|
||||
fprandom_test(a->re);
|
||||
fprandom_test(a->im);
|
||||
}
|
||||
25
src/gf/broadwell/lvl1/test/test_extras.h
Normal file
25
src/gf/broadwell/lvl1/test/test_extras.h
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
#ifndef TEST_EXTRAS_H
|
||||
#define TEST_EXTRAS_H
|
||||
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "../include/fp.h"
|
||||
#include "../include/fp2.h"
|
||||
|
||||
#define PASSED 0
|
||||
#define FAILED 1
|
||||
|
||||
// Access system counter for benchmarking
|
||||
//int64_t cpucycles(void);
|
||||
|
||||
// Comparing "nword" elements, a=b? : (1) a!=b, (0) a=b
|
||||
int compare_words(digit_t* a, digit_t* b, unsigned int nwords);
|
||||
|
||||
// Generating a pseudo-random field element in [0, p-1]
|
||||
void fprandom_test(digit_t* a);
|
||||
|
||||
// Generating a pseudo-random element in GF(p^2)
|
||||
void fp2random_test(fp2_t* a);
|
||||
|
||||
#endif
|
||||
295
src/gf/broadwell/lvl1/test/test_fp.c
Normal file
295
src/gf/broadwell/lvl1/test/test_fp.c
Normal file
@@ -0,0 +1,295 @@
|
||||
#include "test_extras.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <bench.h>
|
||||
|
||||
// Global constants
|
||||
extern const digit_t p[NWORDS_FIELD];
|
||||
|
||||
// Benchmark and test parameters
|
||||
static int BENCH_LOOPS = 100000; // Number of iterations per bench
|
||||
static int TEST_LOOPS = 100000; // Number of iterations per test
|
||||
|
||||
|
||||
bool fp_test()
|
||||
{ // Tests for the field arithmetic
|
||||
bool OK = true;
|
||||
int n, passed;
|
||||
fp_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
|
||||
|
||||
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
|
||||
printf("Testing field arithmetic over GF(p): \n\n");
|
||||
|
||||
// Field addition
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a); fprandom_test(b); fprandom_test(c); fprandom_test(d);
|
||||
|
||||
fp_add(d, a, b); fp_add(e, d, c); // e = (a+b)+c
|
||||
fp_add(d, b, c); fp_add(f, d, a); // f = a+(b+c)
|
||||
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_add(d, a, b); // d = a+b
|
||||
fp_add(e, b, a); // e = b+a
|
||||
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(b, 0);
|
||||
fp_add(d, a, b); // d = a+0
|
||||
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(b, 0);
|
||||
fp_neg(d, a);
|
||||
fp_add(e, a, d); // e = a+(-a)
|
||||
if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p) addition tests ............................................ PASSED");
|
||||
else { printf(" GF(p) addition tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Field subtraction
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a); fprandom_test(b); fprandom_test(c); fprandom_test(d);
|
||||
|
||||
fp_sub(d, a, b); fp_sub(e, d, c); // e = (a-b)-c
|
||||
fp_add(d, b, c); fp_sub(f, a, d); // f = a-(b+c)
|
||||
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_sub(d, a, b); // d = a-b
|
||||
fp_sub(e, b, a);
|
||||
fp_neg(e, e); // e = -(b-a)
|
||||
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(b, 0);
|
||||
fp_sub(d, a, b); // d = a-0
|
||||
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(b, 0);
|
||||
fp_sub(e, a, a); // e = a+(-a)
|
||||
if (compare_words(e, b, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p) subtraction tests ......................................... PASSED");
|
||||
else { printf(" GF(p) subtraction tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Field multiplication
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a); fprandom_test(b); fprandom_test(c);
|
||||
|
||||
fp_tomont(ma, a);
|
||||
fp_frommont(c, ma);
|
||||
if (compare_words(a, c, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_tomont(ma, a); fp_tomont(mb, b); fp_tomont(mc, c);
|
||||
fp_mul(md, ma, mb); fp_mul(me, md, mc); // e = (a*b)*c
|
||||
fp_mul(md, mb, mc); fp_mul(mf, md, ma); // f = a*(b*c)
|
||||
fp_frommont(e, me);
|
||||
fp_frommont(f, mf);
|
||||
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_tomont(ma, a); fp_tomont(mb, b); fp_tomont(mc, c);
|
||||
fp_add(md, mb, mc); fp_mul(me, ma, md); // e = a*(b+c)
|
||||
fp_mul(md, ma, mb); fp_mul(mf, ma, mc); fp_add(mf, md, mf); // f = a*b+a*c
|
||||
fp_frommont(e, me);
|
||||
fp_frommont(f, mf);
|
||||
if (compare_words(e, f, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_tomont(ma, a); fp_tomont(mb, b);
|
||||
fp_mul(md, ma, mb); // d = a*b
|
||||
fp_mul(me, mb, ma); // e = b*a
|
||||
fp_frommont(d, md);
|
||||
fp_frommont(e, me);
|
||||
if (compare_words(d, e, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_tomont(ma, a);
|
||||
fp_set(b, 1); fp_tomont(mb, b);
|
||||
fp_mul(md, ma, mb); // d = a*1
|
||||
fp_frommont(a, ma);
|
||||
fp_frommont(d, md);
|
||||
if (compare_words(a, d, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(b, 0);
|
||||
fp_tomont(mb, b);
|
||||
fp_mul(md, ma, mb); // d = a*0
|
||||
fp_frommont(d, md);
|
||||
if (compare_words(b, d, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p) multiplication tests ...................................... PASSED");
|
||||
else { printf(" GF(p) multiplication tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Field squaring
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a);
|
||||
|
||||
fp_tomont(ma, a);
|
||||
fp_sqr(mb, ma); // b = a^2
|
||||
fp_mul(mc, ma, ma); // c = a*a
|
||||
fp_frommont(b, mb);
|
||||
fp_frommont(c, mc);
|
||||
if (compare_words(b, c, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp_set(a, 0); fp_tomont(ma, a);
|
||||
fp_sqr(md, ma); // d = 0^2
|
||||
if (compare_words(ma, md, NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p) squaring tests............................................. PASSED");
|
||||
else { printf(" GF(p) squaring tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Field inversion
|
||||
passed = 1;
|
||||
for (n = 0; n < TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a);
|
||||
|
||||
fp_tomont(ma, a);
|
||||
fp_set(d, 1);
|
||||
memcpy(mb, ma, RADIX/8 * NWORDS_FIELD);
|
||||
fp_inv(ma);
|
||||
fp_mul(mc, ma, mb); // c = a*a^-1
|
||||
fp_frommont(c, mc);
|
||||
if (compare_words(c, d, NWORDS_FIELD) != 0) { passed = 0; break; }
|
||||
|
||||
fp_set(a, 0);
|
||||
fp_set(d, 0);
|
||||
fp_inv(a); // c = 0^-1
|
||||
if (compare_words(a, d, NWORDS_FIELD) != 0) { passed = 0; break; }
|
||||
}
|
||||
if (passed == 1) printf(" GF(p) inversion tests............................................ PASSED");
|
||||
else { printf(" GF(p) inversion tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Square root and square detection
|
||||
passed = 1;
|
||||
for (n = 0; n < TEST_LOOPS; n++)
|
||||
{
|
||||
fprandom_test(a);
|
||||
|
||||
fp_tomont(ma, a);
|
||||
fp_sqr(mc, ma);
|
||||
fp_frommont(c, mc); // c = a^2
|
||||
if (fp_is_square(mc) != 1) { passed = 0; break; }
|
||||
|
||||
fp_sqrt(mc); // c = a = sqrt(c)
|
||||
fp_neg(md, mc);
|
||||
fp_frommont(c, mc);
|
||||
fp_frommont(d, md);
|
||||
if ((compare_words(a, c, NWORDS_FIELD) != 0) && (compare_words(a, d, NWORDS_FIELD) != 0)) { passed = 0; break; }
|
||||
}
|
||||
if (passed == 1) printf(" Square root, square tests........................................ PASSED");
|
||||
else { printf(" Square root, square tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
bool fp_run()
|
||||
{
|
||||
bool OK = true;
|
||||
int n;
|
||||
unsigned long long cycles, cycles1, cycles2;
|
||||
fp_t a, b, c;
|
||||
|
||||
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
|
||||
printf("Benchmarking field arithmetic: \n\n");
|
||||
|
||||
fprandom_test(a); fprandom_test(b); fprandom_test(c);
|
||||
|
||||
// GF(p) addition
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_add(c, a, b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p) addition runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p) subtraction
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_sub(c, a, b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p) subtraction runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p) multiplication
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_mul(c, a, b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p) multiplication runs in .................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p) inversion
|
||||
cycles = 0;
|
||||
for (n = 0; n < BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_inv(a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" GF(p) inversion runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p) square root
|
||||
cycles = 0;
|
||||
for (n = 0; n < BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_sqrt(a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" GF(p) square root runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// Square checking
|
||||
cycles = 0;
|
||||
for (n = 0; n < BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp_is_square(a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" Square checking runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
printf("Please enter an argument: 'test' or 'bench' and <reps>\n");
|
||||
exit(1);
|
||||
}
|
||||
if (!strcmp(argv[1], "test")) {
|
||||
TEST_LOOPS = atoi(argv[2]);
|
||||
return !fp_test();
|
||||
} else if (!strcmp(argv[1], "bench")) {
|
||||
BENCH_LOOPS = atoi(argv[2]);
|
||||
return !fp_run();
|
||||
} else {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
307
src/gf/broadwell/lvl1/test/test_fp2.c
Normal file
307
src/gf/broadwell/lvl1/test/test_fp2.c
Normal file
@@ -0,0 +1,307 @@
|
||||
#include "test_extras.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <bench.h>
|
||||
|
||||
// Global constants
|
||||
extern const digit_t p[NWORDS_FIELD];
|
||||
|
||||
// Benchmark and test parameters
|
||||
static int BENCH_LOOPS = 100000; // Number of iterations per bench
|
||||
static int TEST_LOOPS = 100000; // Number of iterations per test
|
||||
|
||||
|
||||
bool fp2_test()
|
||||
{ // Tests for the GF(p^2) arithmetic
|
||||
bool OK = true;
|
||||
int n, passed;
|
||||
fp2_t a, b, c, d, e, f, ma, mb, mc, md, me, mf;
|
||||
|
||||
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
|
||||
printf("Testing arithmetic over GF(p^2): \n\n");
|
||||
|
||||
// Addition in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c); fp2random_test(&d);
|
||||
|
||||
fp2_add(&d, &a, &b); fp2_add(&e, &d, &c); // e = (a+b)+c
|
||||
fp2_add(&d, &b, &c); fp2_add(&f, &d, &a); // f = a+(b+c)
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_add(&d, &a, &b); // d = a+b
|
||||
fp2_add(&e, &b, &a); // e = b+a
|
||||
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&b, 0);
|
||||
fp2_add(&d, &a, &b); // d = a+0
|
||||
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&b, 0);
|
||||
fp2_neg(&d, &a);
|
||||
fp2_add(&e, &a, &d); // e = a+(-a)
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p^2) addition tests ............................................ PASSED");
|
||||
else { printf(" GF(p^2) addition tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Subtraction in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c); fp2random_test(&d);
|
||||
|
||||
fp2_sub(&d, &a, &b); fp2_sub(&e, &d, &c); // e = (a-b)-c
|
||||
fp2_add(&d, &b, &c); fp2_sub(&f, &a, &d); // f = a-(b+c)
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_sub(&d, &a, &b); // d = a-b
|
||||
fp2_sub(&e, &b, &a);
|
||||
fp2_neg(&e, &e); // e = -(b-a)
|
||||
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&b, 0);
|
||||
fp2_sub(&d, &a, &b); // d = a-0
|
||||
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&b, 0);
|
||||
fp2_sub(&e, &a, &a); // e = a+(-a)
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&b, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p^2) subtraction tests ......................................... PASSED");
|
||||
else { printf(" GF(p^2) subtraction tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Multiplication in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c);
|
||||
|
||||
fp2_tomont(&ma, &a);
|
||||
fp2_frommont(&c, &ma);
|
||||
if (compare_words((digit_t*)&a, (digit_t*)&c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b); fp2_tomont(&mc, &c);
|
||||
fp2_mul(&md, &ma, &mb); fp2_mul(&me, &md, &mc); // e = (a*b)*c
|
||||
fp2_mul(&md, &mb, &mc); fp2_mul(&mf, &md, &ma); // f = a*(b*c)
|
||||
fp2_frommont(&e, &me);
|
||||
fp2_frommont(&f, &mf);
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b); fp2_tomont(&mc, &c);
|
||||
fp2_add(&md, &mb, &mc); fp2_mul(&me, &ma, &md); // e = a*(b+c)
|
||||
fp2_mul(&md, &ma, &mb); fp2_mul(&mf, &ma, &mc); fp2_add(&mf, &md, &mf); // f = a*b+a*c
|
||||
fp2_frommont(&e, &me);
|
||||
fp2_frommont(&f, &mf);
|
||||
if (compare_words((digit_t*)&e, (digit_t*)&f, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_tomont(&ma, &a); fp2_tomont(&mb, &b);
|
||||
fp2_mul(&md, &ma, &mb); // d = a*b
|
||||
fp2_mul(&me, &mb, &ma); // e = b*a
|
||||
fp2_frommont(&d, &md);
|
||||
fp2_frommont(&e, &me);
|
||||
if (compare_words((digit_t*)&d, (digit_t*)&e, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_tomont(&ma, &a);
|
||||
fp2_set(&b, 1); fp2_tomont(&mb, &b);
|
||||
fp2_mul(&md, &ma, &mb); // d = a*1
|
||||
fp2_frommont(&a, &ma);
|
||||
fp2_frommont(&d, &md);
|
||||
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&b, 0);
|
||||
fp2_tomont(&mb, &b);
|
||||
fp2_mul(&md, &ma, &mb); // d = a*0
|
||||
fp2_frommont(&d, &md);
|
||||
if (compare_words((digit_t*)&b, (digit_t*)&d, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p^2) multiplication tests ...................................... PASSED");
|
||||
else { printf(" GF(p^2) multiplication tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Squaring in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a);
|
||||
|
||||
fp2_tomont(&ma, &a);
|
||||
fp2_sqr(&mb, &ma); // b = a^2
|
||||
fp2_mul(&mc, &ma, &ma); // c = a*a
|
||||
fp2_frommont(&b, &mb);
|
||||
fp2_frommont(&c, &mc);
|
||||
if (compare_words((digit_t*)&b, (digit_t*)&c, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
|
||||
fp2_set(&a, 0); fp2_tomont(&ma, &a);
|
||||
fp2_sqr(&md, &ma); // d = 0^2
|
||||
if (compare_words((digit_t*)&ma, (digit_t*)&md, 2*NWORDS_FIELD)!=0) { passed=0; break; }
|
||||
}
|
||||
if (passed==1) printf(" GF(p^2) squaring tests............................................. PASSED");
|
||||
else { printf(" GF(p^2) squaring tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Inversion in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a);
|
||||
|
||||
fp2_tomont(&ma, &a);
|
||||
fp2_set(&d, 1);
|
||||
memcpy(&mb, &ma, RADIX/8 * 2*NWORDS_FIELD);
|
||||
fp2_inv(&ma);
|
||||
fp2_mul(&mc, &ma, &mb); // c = a*a^-1
|
||||
fp2_frommont(&c, &mc);
|
||||
if (compare_words((digit_t*)&c, (digit_t*)&d, 2*NWORDS_FIELD) != 0) { passed = 0; break; }
|
||||
|
||||
fp2_set(&a, 0);
|
||||
fp2_set(&d, 0);
|
||||
fp2_inv(&a); // c = 0^-1
|
||||
if (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD) != 0) { passed = 0; break; }
|
||||
}
|
||||
if (passed == 1) printf(" GF(p^2) inversion tests............................................ PASSED");
|
||||
else { printf(" GF(p^2) inversion tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
// Square root and square detection in GF(p^2)
|
||||
passed = 1;
|
||||
for (n=0; n<TEST_LOOPS; n++)
|
||||
{
|
||||
fp2random_test(&a);
|
||||
|
||||
fp2_tomont(&ma, &a);
|
||||
fp2_sqr(&mc, &ma);
|
||||
fp2_frommont(&c, &mc); // c = a^2
|
||||
if (fp2_is_square(&mc) != 1) { passed = 0; break; }
|
||||
|
||||
fp2_sqrt(&mc); // c = a = sqrt(c)
|
||||
fp2_neg(&md, &mc);
|
||||
fp2_frommont(&c, &mc);
|
||||
fp2_frommont(&d, &md);
|
||||
if ((compare_words((digit_t*)&a, (digit_t*)&c, 2*NWORDS_FIELD) != 0) & (compare_words((digit_t*)&a, (digit_t*)&d, 2*NWORDS_FIELD) != 0)) { passed = 0; break; }
|
||||
}
|
||||
if (passed == 1) printf(" Square root, square tests.......................................... PASSED");
|
||||
else { printf(" Square root, square tests... FAILED"); printf("\n"); return false; }
|
||||
printf("\n");
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
bool fp2_run()
|
||||
{
|
||||
bool OK = true;
|
||||
int n;
|
||||
unsigned long long cycles, cycles1, cycles2;
|
||||
fp2_t a, b, c;
|
||||
|
||||
printf("\n--------------------------------------------------------------------------------------------------------\n\n");
|
||||
printf("Benchmarking arithmetic over GF(p^2): \n\n");
|
||||
|
||||
fp2random_test(&a); fp2random_test(&b); fp2random_test(&c);
|
||||
|
||||
// GF(p^2) addition
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_add(&c, &a, &b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p^2) addition runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p^2) subtraction
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_sub(&c, &a, &b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p^2) subtraction runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p^2) squaring
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_sqr(&c, &a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" GF(p^2) squaring runs in .......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p^2) multiplication
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_mul(&c, &a, &b);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles+(cycles2-cycles1);
|
||||
}
|
||||
printf(" GF(p^2) multiplication runs in .................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p^2) inversion
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_inv(&a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" GF(p^2) inversion runs in ......................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// GF(p^2) square root
|
||||
cycles = 0;
|
||||
for (n = 0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_sqrt(&a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" GF(p^2) square root runs in ....................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
// Square checking
|
||||
cycles = 0;
|
||||
for (n=0; n<BENCH_LOOPS; n++)
|
||||
{
|
||||
cycles1 = cpucycles();
|
||||
fp2_is_square(&a);
|
||||
cycles2 = cpucycles();
|
||||
cycles = cycles + (cycles2 - cycles1);
|
||||
}
|
||||
printf(" Square checking runs in ........................................... %7lld cycles", cycles/BENCH_LOOPS);
|
||||
printf("\n");
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
printf("Please enter an argument: 'test' or 'bench' and <reps>\n");
|
||||
exit(1);
|
||||
}
|
||||
if (!strcmp(argv[1], "test")) {
|
||||
TEST_LOOPS = atoi(argv[2]);
|
||||
return !fp2_test();
|
||||
} else if (!strcmp(argv[1], "bench")) {
|
||||
BENCH_LOOPS = atoi(argv[2]);
|
||||
return !fp2_run();
|
||||
} else {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user