From 9f0409b6b4f72e39dfdffd3ca1784b1aa08f5855 Mon Sep 17 00:00:00 2001 From: StarsAC Date: Mon, 20 Oct 2025 01:07:36 +0000 Subject: [PATCH] add pqm4 --- .gitignore | 2 + src/pqm4/sqisign_lvl1/ref/api.h | 31 + src/pqm4/sqisign_lvl1/ref/basis.c | 416 +++++ src/pqm4/sqisign_lvl1/ref/common.c | 88 + src/pqm4/sqisign_lvl1/ref/config.mk | 2 + src/pqm4/sqisign_lvl1/ref/e0_basis.c | 55 + src/pqm4/sqisign_lvl1/ref/e0_basis.h | 3 + src/pqm4/sqisign_lvl1/ref/ec.c | 665 ++++++++ src/pqm4/sqisign_lvl1/ref/ec.h | 668 ++++++++ src/pqm4/sqisign_lvl1/ref/ec_jac.c | 335 ++++ src/pqm4/sqisign_lvl1/ref/ec_params.c | 4 + src/pqm4/sqisign_lvl1/ref/ec_params.h | 12 + .../sqisign_lvl1/ref/encode_verification.c | 220 +++ src/pqm4/sqisign_lvl1/ref/encoded_sizes.h | 11 + src/pqm4/sqisign_lvl1/ref/fp.c | 15 + src/pqm4/sqisign_lvl1/ref/fp.h | 48 + src/pqm4/sqisign_lvl1/ref/fp2.c | 328 ++++ src/pqm4/sqisign_lvl1/ref/fp2.h | 41 + src/pqm4/sqisign_lvl1/ref/fp_constants.h | 17 + src/pqm4/sqisign_lvl1/ref/fp_p5248_32.c | 942 ++++++++++ src/pqm4/sqisign_lvl1/ref/hd.c | 93 + src/pqm4/sqisign_lvl1/ref/hd.h | 435 +++++ .../ref/hd_splitting_transforms.c | 143 ++ .../ref/hd_splitting_transforms.h | 18 + src/pqm4/sqisign_lvl1/ref/isog.h | 28 + src/pqm4/sqisign_lvl1/ref/isog_chains.c | 241 +++ src/pqm4/sqisign_lvl1/ref/mp.c | 357 ++++ src/pqm4/sqisign_lvl1/ref/mp.h | 88 + src/pqm4/sqisign_lvl1/ref/pqm4_api.c | 60 + src/pqm4/sqisign_lvl1/ref/rng.h | 8 + src/pqm4/sqisign_lvl1/ref/sig.h | 85 + src/pqm4/sqisign_lvl1/ref/sqisign.c | 106 ++ src/pqm4/sqisign_lvl1/ref/sqisign_namespace.h | 1022 +++++++++++ src/pqm4/sqisign_lvl1/ref/theta_isogenies.c | 1283 ++++++++++++++ src/pqm4/sqisign_lvl1/ref/theta_isogenies.h | 18 + src/pqm4/sqisign_lvl1/ref/theta_structure.c | 78 + src/pqm4/sqisign_lvl1/ref/theta_structure.h | 135 ++ src/pqm4/sqisign_lvl1/ref/tools.h | 49 + src/pqm4/sqisign_lvl1/ref/tutil.h | 36 + src/pqm4/sqisign_lvl1/ref/verification.h | 123 ++ src/pqm4/sqisign_lvl1/ref/verify.c | 309 ++++ src/pqm4/sqisign_lvl1/ref/xeval.c | 64 + src/pqm4/sqisign_lvl1/ref/xisog.c | 61 + src/pqm4/sqisign_lvl3/ref/api.h | 31 + src/pqm4/sqisign_lvl3/ref/basis.c | 416 +++++ src/pqm4/sqisign_lvl3/ref/common.c | 88 + src/pqm4/sqisign_lvl3/ref/config.mk | 2 + src/pqm4/sqisign_lvl3/ref/e0_basis.c | 55 + src/pqm4/sqisign_lvl3/ref/e0_basis.h | 3 + src/pqm4/sqisign_lvl3/ref/ec.c | 665 ++++++++ src/pqm4/sqisign_lvl3/ref/ec.h | 668 ++++++++ src/pqm4/sqisign_lvl3/ref/ec_jac.c | 335 ++++ src/pqm4/sqisign_lvl3/ref/ec_params.c | 4 + src/pqm4/sqisign_lvl3/ref/ec_params.h | 12 + .../sqisign_lvl3/ref/encode_verification.c | 220 +++ src/pqm4/sqisign_lvl3/ref/encoded_sizes.h | 11 + src/pqm4/sqisign_lvl3/ref/fp.c | 15 + src/pqm4/sqisign_lvl3/ref/fp.h | 48 + src/pqm4/sqisign_lvl3/ref/fp2.c | 328 ++++ src/pqm4/sqisign_lvl3/ref/fp2.h | 41 + src/pqm4/sqisign_lvl3/ref/fp_constants.h | 17 + src/pqm4/sqisign_lvl3/ref/fp_p65376_32.c | 1231 ++++++++++++++ src/pqm4/sqisign_lvl3/ref/hd.c | 93 + src/pqm4/sqisign_lvl3/ref/hd.h | 435 +++++ .../ref/hd_splitting_transforms.c | 143 ++ .../ref/hd_splitting_transforms.h | 18 + src/pqm4/sqisign_lvl3/ref/isog.h | 28 + src/pqm4/sqisign_lvl3/ref/isog_chains.c | 241 +++ src/pqm4/sqisign_lvl3/ref/mp.c | 357 ++++ src/pqm4/sqisign_lvl3/ref/mp.h | 88 + src/pqm4/sqisign_lvl3/ref/pqm4_api.c | 60 + src/pqm4/sqisign_lvl3/ref/rng.h | 8 + src/pqm4/sqisign_lvl3/ref/sig.h | 85 + src/pqm4/sqisign_lvl3/ref/sqisign.c | 106 ++ src/pqm4/sqisign_lvl3/ref/sqisign_namespace.h | 1022 +++++++++++ src/pqm4/sqisign_lvl3/ref/theta_isogenies.c | 1283 ++++++++++++++ src/pqm4/sqisign_lvl3/ref/theta_isogenies.h | 18 + src/pqm4/sqisign_lvl3/ref/theta_structure.c | 78 + src/pqm4/sqisign_lvl3/ref/theta_structure.h | 135 ++ src/pqm4/sqisign_lvl3/ref/tools.h | 49 + src/pqm4/sqisign_lvl3/ref/tutil.h | 36 + src/pqm4/sqisign_lvl3/ref/verification.h | 123 ++ src/pqm4/sqisign_lvl3/ref/verify.c | 309 ++++ src/pqm4/sqisign_lvl3/ref/xeval.c | 64 + src/pqm4/sqisign_lvl3/ref/xisog.c | 61 + src/pqm4/sqisign_lvl5/ref/api.h | 31 + src/pqm4/sqisign_lvl5/ref/basis.c | 416 +++++ src/pqm4/sqisign_lvl5/ref/common.c | 88 + src/pqm4/sqisign_lvl5/ref/config.mk | 2 + src/pqm4/sqisign_lvl5/ref/e0_basis.c | 55 + src/pqm4/sqisign_lvl5/ref/e0_basis.h | 3 + src/pqm4/sqisign_lvl5/ref/ec.c | 665 ++++++++ src/pqm4/sqisign_lvl5/ref/ec.h | 668 ++++++++ src/pqm4/sqisign_lvl5/ref/ec_jac.c | 335 ++++ src/pqm4/sqisign_lvl5/ref/ec_params.c | 4 + src/pqm4/sqisign_lvl5/ref/ec_params.h | 12 + .../sqisign_lvl5/ref/encode_verification.c | 220 +++ src/pqm4/sqisign_lvl5/ref/encoded_sizes.h | 11 + src/pqm4/sqisign_lvl5/ref/fp.c | 15 + src/pqm4/sqisign_lvl5/ref/fp.h | 48 + src/pqm4/sqisign_lvl5/ref/fp2.c | 328 ++++ src/pqm4/sqisign_lvl5/ref/fp2.h | 41 + src/pqm4/sqisign_lvl5/ref/fp_constants.h | 17 + src/pqm4/sqisign_lvl5/ref/fp_p27500_32.c | 1514 +++++++++++++++++ src/pqm4/sqisign_lvl5/ref/hd.c | 93 + src/pqm4/sqisign_lvl5/ref/hd.h | 435 +++++ .../ref/hd_splitting_transforms.c | 143 ++ .../ref/hd_splitting_transforms.h | 18 + src/pqm4/sqisign_lvl5/ref/isog.h | 28 + src/pqm4/sqisign_lvl5/ref/isog_chains.c | 241 +++ src/pqm4/sqisign_lvl5/ref/mp.c | 357 ++++ src/pqm4/sqisign_lvl5/ref/mp.h | 88 + src/pqm4/sqisign_lvl5/ref/pqm4_api.c | 60 + src/pqm4/sqisign_lvl5/ref/rng.h | 8 + src/pqm4/sqisign_lvl5/ref/sig.h | 85 + src/pqm4/sqisign_lvl5/ref/sqisign.c | 106 ++ src/pqm4/sqisign_lvl5/ref/sqisign_namespace.h | 1022 +++++++++++ src/pqm4/sqisign_lvl5/ref/theta_isogenies.c | 1283 ++++++++++++++ src/pqm4/sqisign_lvl5/ref/theta_isogenies.h | 18 + src/pqm4/sqisign_lvl5/ref/theta_structure.c | 78 + src/pqm4/sqisign_lvl5/ref/theta_structure.h | 135 ++ src/pqm4/sqisign_lvl5/ref/tools.h | 49 + src/pqm4/sqisign_lvl5/ref/tutil.h | 36 + src/pqm4/sqisign_lvl5/ref/verification.h | 123 ++ src/pqm4/sqisign_lvl5/ref/verify.c | 309 ++++ src/pqm4/sqisign_lvl5/ref/xeval.c | 64 + src/pqm4/sqisign_lvl5/ref/xisog.c | 61 + 127 files changed, 27086 insertions(+) create mode 100644 src/pqm4/sqisign_lvl1/ref/api.h create mode 100644 src/pqm4/sqisign_lvl1/ref/basis.c create mode 100644 src/pqm4/sqisign_lvl1/ref/common.c create mode 100644 src/pqm4/sqisign_lvl1/ref/config.mk create mode 100644 src/pqm4/sqisign_lvl1/ref/e0_basis.c create mode 100644 src/pqm4/sqisign_lvl1/ref/e0_basis.h create mode 100644 src/pqm4/sqisign_lvl1/ref/ec.c create mode 100644 src/pqm4/sqisign_lvl1/ref/ec.h create mode 100644 src/pqm4/sqisign_lvl1/ref/ec_jac.c create mode 100644 src/pqm4/sqisign_lvl1/ref/ec_params.c create mode 100644 src/pqm4/sqisign_lvl1/ref/ec_params.h create mode 100644 src/pqm4/sqisign_lvl1/ref/encode_verification.c create mode 100644 src/pqm4/sqisign_lvl1/ref/encoded_sizes.h create mode 100644 src/pqm4/sqisign_lvl1/ref/fp.c create mode 100644 src/pqm4/sqisign_lvl1/ref/fp.h create mode 100644 src/pqm4/sqisign_lvl1/ref/fp2.c create mode 100644 src/pqm4/sqisign_lvl1/ref/fp2.h create mode 100644 src/pqm4/sqisign_lvl1/ref/fp_constants.h create mode 100644 src/pqm4/sqisign_lvl1/ref/fp_p5248_32.c create mode 100644 src/pqm4/sqisign_lvl1/ref/hd.c create mode 100644 src/pqm4/sqisign_lvl1/ref/hd.h create mode 100644 src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.c create mode 100644 src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.h create mode 100644 src/pqm4/sqisign_lvl1/ref/isog.h create mode 100644 src/pqm4/sqisign_lvl1/ref/isog_chains.c create mode 100644 src/pqm4/sqisign_lvl1/ref/mp.c create mode 100644 src/pqm4/sqisign_lvl1/ref/mp.h create mode 100644 src/pqm4/sqisign_lvl1/ref/pqm4_api.c create mode 100644 src/pqm4/sqisign_lvl1/ref/rng.h create mode 100644 src/pqm4/sqisign_lvl1/ref/sig.h create mode 100644 src/pqm4/sqisign_lvl1/ref/sqisign.c create mode 100644 src/pqm4/sqisign_lvl1/ref/sqisign_namespace.h create mode 100644 src/pqm4/sqisign_lvl1/ref/theta_isogenies.c create mode 100644 src/pqm4/sqisign_lvl1/ref/theta_isogenies.h create mode 100644 src/pqm4/sqisign_lvl1/ref/theta_structure.c create mode 100644 src/pqm4/sqisign_lvl1/ref/theta_structure.h create mode 100644 src/pqm4/sqisign_lvl1/ref/tools.h create mode 100644 src/pqm4/sqisign_lvl1/ref/tutil.h create mode 100644 src/pqm4/sqisign_lvl1/ref/verification.h create mode 100644 src/pqm4/sqisign_lvl1/ref/verify.c create mode 100644 src/pqm4/sqisign_lvl1/ref/xeval.c create mode 100644 src/pqm4/sqisign_lvl1/ref/xisog.c create mode 100644 src/pqm4/sqisign_lvl3/ref/api.h create mode 100644 src/pqm4/sqisign_lvl3/ref/basis.c create mode 100644 src/pqm4/sqisign_lvl3/ref/common.c create mode 100644 src/pqm4/sqisign_lvl3/ref/config.mk create mode 100644 src/pqm4/sqisign_lvl3/ref/e0_basis.c create mode 100644 src/pqm4/sqisign_lvl3/ref/e0_basis.h create mode 100644 src/pqm4/sqisign_lvl3/ref/ec.c create mode 100644 src/pqm4/sqisign_lvl3/ref/ec.h create mode 100644 src/pqm4/sqisign_lvl3/ref/ec_jac.c create mode 100644 src/pqm4/sqisign_lvl3/ref/ec_params.c create mode 100644 src/pqm4/sqisign_lvl3/ref/ec_params.h create mode 100644 src/pqm4/sqisign_lvl3/ref/encode_verification.c create mode 100644 src/pqm4/sqisign_lvl3/ref/encoded_sizes.h create mode 100644 src/pqm4/sqisign_lvl3/ref/fp.c create mode 100644 src/pqm4/sqisign_lvl3/ref/fp.h create mode 100644 src/pqm4/sqisign_lvl3/ref/fp2.c create mode 100644 src/pqm4/sqisign_lvl3/ref/fp2.h create mode 100644 src/pqm4/sqisign_lvl3/ref/fp_constants.h create mode 100644 src/pqm4/sqisign_lvl3/ref/fp_p65376_32.c create mode 100644 src/pqm4/sqisign_lvl3/ref/hd.c create mode 100644 src/pqm4/sqisign_lvl3/ref/hd.h create mode 100644 src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.c create mode 100644 src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.h create mode 100644 src/pqm4/sqisign_lvl3/ref/isog.h create mode 100644 src/pqm4/sqisign_lvl3/ref/isog_chains.c create mode 100644 src/pqm4/sqisign_lvl3/ref/mp.c create mode 100644 src/pqm4/sqisign_lvl3/ref/mp.h create mode 100644 src/pqm4/sqisign_lvl3/ref/pqm4_api.c create mode 100644 src/pqm4/sqisign_lvl3/ref/rng.h create mode 100644 src/pqm4/sqisign_lvl3/ref/sig.h create mode 100644 src/pqm4/sqisign_lvl3/ref/sqisign.c create mode 100644 src/pqm4/sqisign_lvl3/ref/sqisign_namespace.h create mode 100644 src/pqm4/sqisign_lvl3/ref/theta_isogenies.c create mode 100644 src/pqm4/sqisign_lvl3/ref/theta_isogenies.h create mode 100644 src/pqm4/sqisign_lvl3/ref/theta_structure.c create mode 100644 src/pqm4/sqisign_lvl3/ref/theta_structure.h create mode 100644 src/pqm4/sqisign_lvl3/ref/tools.h create mode 100644 src/pqm4/sqisign_lvl3/ref/tutil.h create mode 100644 src/pqm4/sqisign_lvl3/ref/verification.h create mode 100644 src/pqm4/sqisign_lvl3/ref/verify.c create mode 100644 src/pqm4/sqisign_lvl3/ref/xeval.c create mode 100644 src/pqm4/sqisign_lvl3/ref/xisog.c create mode 100644 src/pqm4/sqisign_lvl5/ref/api.h create mode 100644 src/pqm4/sqisign_lvl5/ref/basis.c create mode 100644 src/pqm4/sqisign_lvl5/ref/common.c create mode 100644 src/pqm4/sqisign_lvl5/ref/config.mk create mode 100644 src/pqm4/sqisign_lvl5/ref/e0_basis.c create mode 100644 src/pqm4/sqisign_lvl5/ref/e0_basis.h create mode 100644 src/pqm4/sqisign_lvl5/ref/ec.c create mode 100644 src/pqm4/sqisign_lvl5/ref/ec.h create mode 100644 src/pqm4/sqisign_lvl5/ref/ec_jac.c create mode 100644 src/pqm4/sqisign_lvl5/ref/ec_params.c create mode 100644 src/pqm4/sqisign_lvl5/ref/ec_params.h create mode 100644 src/pqm4/sqisign_lvl5/ref/encode_verification.c create mode 100644 src/pqm4/sqisign_lvl5/ref/encoded_sizes.h create mode 100644 src/pqm4/sqisign_lvl5/ref/fp.c create mode 100644 src/pqm4/sqisign_lvl5/ref/fp.h create mode 100644 src/pqm4/sqisign_lvl5/ref/fp2.c create mode 100644 src/pqm4/sqisign_lvl5/ref/fp2.h create mode 100644 src/pqm4/sqisign_lvl5/ref/fp_constants.h create mode 100644 src/pqm4/sqisign_lvl5/ref/fp_p27500_32.c create mode 100644 src/pqm4/sqisign_lvl5/ref/hd.c create mode 100644 src/pqm4/sqisign_lvl5/ref/hd.h create mode 100644 src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.c create mode 100644 src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.h create mode 100644 src/pqm4/sqisign_lvl5/ref/isog.h create mode 100644 src/pqm4/sqisign_lvl5/ref/isog_chains.c create mode 100644 src/pqm4/sqisign_lvl5/ref/mp.c create mode 100644 src/pqm4/sqisign_lvl5/ref/mp.h create mode 100644 src/pqm4/sqisign_lvl5/ref/pqm4_api.c create mode 100644 src/pqm4/sqisign_lvl5/ref/rng.h create mode 100644 src/pqm4/sqisign_lvl5/ref/sig.h create mode 100644 src/pqm4/sqisign_lvl5/ref/sqisign.c create mode 100644 src/pqm4/sqisign_lvl5/ref/sqisign_namespace.h create mode 100644 src/pqm4/sqisign_lvl5/ref/theta_isogenies.c create mode 100644 src/pqm4/sqisign_lvl5/ref/theta_isogenies.h create mode 100644 src/pqm4/sqisign_lvl5/ref/theta_structure.c create mode 100644 src/pqm4/sqisign_lvl5/ref/theta_structure.h create mode 100644 src/pqm4/sqisign_lvl5/ref/tools.h create mode 100644 src/pqm4/sqisign_lvl5/ref/tutil.h create mode 100644 src/pqm4/sqisign_lvl5/ref/verification.h create mode 100644 src/pqm4/sqisign_lvl5/ref/verify.c create mode 100644 src/pqm4/sqisign_lvl5/ref/xeval.c create mode 100644 src/pqm4/sqisign_lvl5/ref/xisog.c diff --git a/.gitignore b/.gitignore index ef8e9b6..c0c967e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ +bulid build*/ html/ latex/ .vscode *.DS_Store + diff --git a/src/pqm4/sqisign_lvl1/ref/api.h b/src/pqm4/sqisign_lvl1/ref/api.h new file mode 100644 index 0000000..652f39f --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/api.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef api_h +#define api_h + +#include +#include + +#define CRYPTO_SECRETKEYBYTES 353 +#define CRYPTO_PUBLICKEYBYTES 65 +#define CRYPTO_BYTES 148 + +#define CRYPTO_ALGNAME "SQIsign_lvl1" + +SQISIGN_API +int +crypto_sign_keypair(unsigned char *pk, unsigned char *sk); + +SQISIGN_API +int +crypto_sign(unsigned char *sm, size_t *smlen, + const unsigned char *m, size_t mlen, + const unsigned char *sk); + +SQISIGN_API +int +crypto_sign_open(unsigned char *m, size_t *mlen, + const unsigned char *sm, size_t smlen, + const unsigned char *pk); + +#endif /* api_h */ diff --git a/src/pqm4/sqisign_lvl1/ref/basis.c b/src/pqm4/sqisign_lvl1/ref/basis.c new file mode 100644 index 0000000..94cb7fc --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/basis.c @@ -0,0 +1,416 @@ +#include "ec.h" +#include "fp2.h" +#include "e0_basis.h" +#include + +uint32_t +ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve) +{ // Recover y-coordinate of a point on the Montgomery curve y^2 = x^3 + Ax^2 + x + fp2_t t0; + + fp2_sqr(&t0, Px); + fp2_mul(y, &t0, &curve->A); // Ax^2 + fp2_add(y, y, Px); // Ax^2 + x + fp2_mul(&t0, &t0, Px); + fp2_add(y, y, &t0); // x^3 + Ax^2 + x + // This is required, because we do not yet know that our curves are + // supersingular so our points live on the twist with B = 1. + return fp2_sqrt_verify(y); +} + +static void +difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve) +{ + // Given P,Q in projective x-only, computes a deterministic choice for (P-Q) + // Based on Proposition 3 of https://eprint.iacr.org/2017/518.pdf + + fp2_t Bxx, Bxz, Bzz, t0, t1; + + fp2_mul(&t0, &P->x, &Q->x); + fp2_mul(&t1, &P->z, &Q->z); + fp2_sub(&Bxx, &t0, &t1); + fp2_sqr(&Bxx, &Bxx); + fp2_mul(&Bxx, &Bxx, &curve->C); // C*(P.x*Q.x-P.z*Q.z)^2 + fp2_add(&Bxz, &t0, &t1); + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + fp2_add(&Bzz, &t0, &t1); + fp2_mul(&Bxz, &Bxz, &Bzz); // (P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_sub(&Bzz, &t0, &t1); + fp2_sqr(&Bzz, &Bzz); + fp2_mul(&Bzz, &Bzz, &curve->C); // C*(P.x*Q.z-P.z*Q.x)^2 + fp2_mul(&Bxz, &Bxz, &curve->C); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &curve->A); + fp2_add(&t0, &t0, &t0); + fp2_add(&Bxz, &Bxz, &t0); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + 2*A*P.x*Q.z*P.z*Q.x + + // To ensure that the denominator is a fourth power in Fp, we normalize by + // C*C_bar^2*(P.z)_bar^2*(Q.z)_bar^2 + fp_copy(&t0.re, &curve->C.re); + fp_neg(&t0.im, &curve->C.im); + fp2_sqr(&t0, &t0); + fp2_mul(&t0, &t0, &curve->C); + fp_copy(&t1.re, &P->z.re); + fp_neg(&t1.im, &P->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp_copy(&t1.re, &Q->z.re); + fp_neg(&t1.im, &Q->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&Bxx, &Bxx, &t0); + fp2_mul(&Bxz, &Bxz, &t0); + fp2_mul(&Bzz, &Bzz, &t0); + + // Solving quadratic equation + fp2_sqr(&t0, &Bxz); + fp2_mul(&t1, &Bxx, &Bzz); + fp2_sub(&t0, &t0, &t1); + // No need to check if t0 is square, as per the entangled basis algorithm. + fp2_sqrt(&t0); + fp2_add(&PQ->x, &Bxz, &t0); + fp2_copy(&PQ->z, &Bzz); +} + +// Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and the point +// P = (X/Z : 1). For generic implementation see lift_basis() +uint32_t +lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + assert(fp2_is_one(&B->P.z)); + assert(fp2_is_one(&E->C)); + + fp2_copy(&P->x, &B->P.x); + fp2_copy(&Q->x, &B->Q.x); + fp2_copy(&Q->z, &B->Q.z); + fp2_set_one(&P->z); + uint32_t ret = ec_recover_y(&P->y, &P->x, E); + + // Algorithm of Okeya-Sakurai to recover y.Q in the montgomery model + fp2_t v1, v2, v3, v4; + fp2_mul(&v1, &P->x, &Q->z); + fp2_add(&v2, &Q->x, &v1); + fp2_sub(&v3, &Q->x, &v1); + fp2_sqr(&v3, &v3); + fp2_mul(&v3, &v3, &B->PmQ.x); + fp2_add(&v1, &E->A, &E->A); + fp2_mul(&v1, &v1, &Q->z); + fp2_add(&v2, &v2, &v1); + fp2_mul(&v4, &P->x, &Q->x); + fp2_add(&v4, &v4, &Q->z); + fp2_mul(&v2, &v2, &v4); + fp2_mul(&v1, &v1, &Q->z); + fp2_sub(&v2, &v2, &v1); + fp2_mul(&v2, &v2, &B->PmQ.z); + fp2_sub(&Q->y, &v3, &v2); + fp2_add(&v1, &P->y, &P->y); + fp2_mul(&v1, &v1, &Q->z); + fp2_mul(&v1, &v1, &B->PmQ.z); + fp2_mul(&Q->x, &Q->x, &v1); + fp2_mul(&Q->z, &Q->z, &v1); + + // Transforming to a jacobian coordinate + fp2_sqr(&v1, &Q->z); + fp2_mul(&Q->y, &Q->y, &v1); + fp2_mul(&Q->x, &Q->x, &Q->z); + return ret; +} + +uint32_t +lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + // Normalise the curve E such that (A : C) is (A/C : 1) + // and the point x(P) = (X/Z : 1). + fp2_t inverses[2]; + fp2_copy(&inverses[0], &B->P.z); + fp2_copy(&inverses[1], &E->C); + + fp2_batched_inv(inverses, 2); + fp2_set_one(&B->P.z); + fp2_set_one(&E->C); + + fp2_mul(&B->P.x, &B->P.x, &inverses[0]); + fp2_mul(&E->A, &E->A, &inverses[1]); + + // Lift the basis to Jacobian points P, Q + return lift_basis_normalized(P, Q, B, E); +} + +// Given an x-coordinate, determines if this is a valid +// point on the curve. Assumes C=1. +static uint32_t +is_on_curve(const fp2_t *x, const ec_curve_t *curve) +{ + assert(fp2_is_one(&curve->C)); + fp2_t t0; + + fp2_add(&t0, x, &curve->A); // x + (A/C) + fp2_mul(&t0, &t0, x); // x^2 + (A/C)*x + fp2_add_one(&t0, &t0); // x^2 + (A/C)*x + 1 + fp2_mul(&t0, &t0, x); // x^3 + (A/C)*x^2 + x + + return fp2_is_square(&t0); +} + +// Helper function which given a point of order k*2^n with n maximal +// and k odd, computes a point of order 2^f +static inline void +clear_cofactor_for_maximal_even_order(ec_point_t *P, ec_curve_t *curve, int f) +{ + // clear out the odd cofactor to get a point of order 2^n + ec_mul(P, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, P, curve); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_A24(P, P, &curve->A24, curve->is_A24_computed_and_normalized); + } +} + +// Helper function which finds an NQR -1 / (1 + i*b) for entangled basis generation +static uint8_t +find_nqr_factor(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + // factor = -1/(1 + i*b) for b in Fp will be NQR whenever 1 + b^2 is NQR + // in Fp, so we find one of these and then invert (1 + i*b). We store b + // as a u8 hint to save time in verification. + + // We return the hint as a u8, but use (uint16_t)n to give 2^16 - 1 + // to make failure cryptographically negligible, with a fallback when + // n > 128 is required. + uint8_t hint; + uint32_t found = 0; + uint16_t n = start; + + bool qr_b = 1; + fp_t b, tmp; + fp2_t z, t0, t1; + + do { + while (qr_b) { + // find b with 1 + b^2 a non-quadratic residue + fp_set_small(&tmp, (uint32_t)n * n + 1); + qr_b = fp_is_square(&tmp); + n++; // keeps track of b = n - 1 + } + + // for Px := -A/(1 + i*b) to be on the curve + // is equivalent to A^2*(z-1) - z^2 NQR for z = 1 + i*b + // thus prevents unnecessary inversion pre-check + + // t0 = z - 1 = i*b + // t1 = z = 1 + i*b + fp_set_small(&b, (uint32_t)n - 1); + fp2_set_zero(&t0); + fp2_set_one(&z); + fp_copy(&z.im, &b); + fp_copy(&t0.im, &b); + + // A^2*(z-1) - z^2 + fp2_sqr(&t1, &curve->A); + fp2_mul(&t0, &t0, &t1); // A^2 * (z - 1) + fp2_sqr(&t1, &z); + fp2_sub(&t0, &t0, &t1); // A^2 * (z - 1) - z^2 + found = !fp2_is_square(&t0); + + qr_b = 1; + } while (!found); + + // set Px to -A/(1 + i*b) + fp2_copy(x, &z); + fp2_inv(x); + fp2_mul(x, x, &curve->A); + fp2_neg(x, x); + + /* + * With very low probability n will not fit in 7 bits. + * We set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + hint = n <= 128 ? n - 1 : 0; + + return hint; +} + +// Helper function which finds a point x(P) = n * A +static uint8_t +find_nA_x_coord(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + assert(!fp2_is_square(&curve->A)); // Only to be called when A is a NQR + + // when A is NQR we allow x(P) to be a multiple n*A of A + uint8_t n = start; + if (n == 1) { + fp2_copy(x, &curve->A); + } else { + fp2_mul_small(x, &curve->A, n); + } + + while (!is_on_curve(x, curve)) { + fp2_add(x, x, &curve->A); + n++; + } + + /* + * With very low probability (1/2^128), n will not fit in 7 bits. + * In this case, we set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + uint8_t hint = n < 128 ? n : 0; + return hint; +} + +// The entangled basis generation does not allow A = 0 +// so we simply return the one we have already precomputed +static void +ec_basis_E0_2f(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + assert(fp2_is_zero(&curve->A)); + ec_point_t P, Q; + + // Set P, Q to precomputed (X : 1) values + fp2_copy(&P.x, &BASIS_E0_PX); + fp2_copy(&Q.x, &BASIS_E0_QX); + fp2_set_one(&P.z); + fp2_set_one(&Q.z); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_E0(&P, &P); + xDBL_E0(&Q, &Q); + } + + // Set P, Q in the basis and compute x(P - Q) + copy_point(&PQ2->P, &P); + copy_point(&PQ2->Q, &Q); + difference_point(&PQ2->PmQ, &P, &Q, curve); +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// and stores hints as an array for faster recomputation at a later point +uint8_t +ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 0; + } + + uint8_t hint; + bool hint_A = fp2_is_square(&curve->A); + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_A) { + // when A is NQR we allow x(P) to be a multiple n*A of A + hint = find_nA_x_coord(&P.x, curve, 1); + } else { + // when A is QR we instead have to find (1 + b^2) a NQR + // such that x(P) = -A / (1 + i*b) + hint = find_nqr_factor(&P.x, curve, 1); + } + + fp2_set_one(&P.z); + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + + // Finally, we compress hint_A and hint into a single bytes. + // We choose to set the LSB of hint to hint_A + assert(hint < 128); // We expect hint to be 7-bits in size + return (hint << 1) | hint_A; +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// given the hints as an array for faster basis computation +int +ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 1; + } + + // The LSB of hint encodes whether A is a QR + // The remaining 7-bits are used to find a valid x(P) + bool hint_A = hint & 1; + uint8_t hint_P = hint >> 1; + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_P) { + // When hint_P = 0 it means we did not find a point in 128 attempts + // this is very rare and we almost never expect to need this fallback + // In either case, we can start with b = 128 to skip testing the known + // values which will not work + if (!hint_A) { + find_nA_x_coord(&P.x, curve, 128); + } else { + find_nqr_factor(&P.x, curve, 128); + } + } else { + // Otherwise we use the hint to directly find x(P) based on hint_A + if (!hint_A) { + // when A is NQR, we have found n such that x(P) = n*A + fp2_mul_small(&P.x, &curve->A, hint_P); + } else { + // when A is QR we have found b such that (1 + b^2) is a NQR in + // Fp, so we must compute x(P) = -A / (1 + i*b) + fp_set_one(&P.x.re); + fp_set_small(&P.x.im, hint_P); + fp2_inv(&P.x); + fp2_mul(&P.x, &P.x, &curve->A); + fp2_neg(&P.x, &P.x); + } + } + fp2_set_one(&P.z); + +#ifndef NDEBUG + int passed = 1; + passed = is_on_curve(&P.x, curve); + passed &= !fp2_is_square(&P.x); + + if (!passed) + return 0; +#endif + + // set xQ to -xP - A + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + +#ifndef NDEBUG + passed &= test_basis_order_twof(PQ2, curve, f); + + if (!passed) + return 0; +#endif + + return 1; +} diff --git a/src/pqm4/sqisign_lvl1/ref/common.c b/src/pqm4/sqisign_lvl1/ref/common.c new file mode 100644 index 0000000..d393e9c --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/common.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +void +public_key_init(public_key_t *pk) +{ + ec_curve_init(&pk->curve); +} + +void +public_key_finalize(public_key_t *pk) +{ +} + +// compute the challenge as the hash of the message and the commitment curve and public key +void +hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length) +{ + unsigned char buf[2 * FP2_ENCODED_BYTES]; + { + fp2_t j1, j2; + ec_j_inv(&j1, &pk->curve); + ec_j_inv(&j2, com_curve); + fp2_encode(buf, &j1); + fp2_encode(buf + FP2_ENCODED_BYTES, &j2); + } + + { + // The type scalar_t represents an element of GF(p), which is about + // 2*lambda bits, where lambda = 128, 192 or 256, according to the + // security level. Thus, the variable scalar should have enough memory + // for the values produced by SHAKE256 in the intermediate iterations. + + shake256incctx ctx; + + size_t hash_bytes = ((2 * SECURITY_BITS) + 7) / 8; + size_t limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + size_t bits = (2 * SECURITY_BITS) % RADIX; + digit_t mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, buf, 2 * FP2_ENCODED_BYTES); + shake256_inc_absorb(&ctx, message, length); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + for (int i = 2; i < HASH_ITERATIONS; i++) { + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + } + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + + hash_bytes = ((TORSION_EVEN_POWER - SQIsign_response_length) + 7) / 8; + limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + bits = (TORSION_EVEN_POWER - SQIsign_response_length) % RADIX; + mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + memset(*scalar, 0, NWORDS_ORDER * sizeof(digit_t)); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + +#ifdef TARGET_BIG_ENDIAN + for (int i = 0; i < NWORDS_ORDER; i++) + (*scalar)[i] = BSWAP_DIGIT((*scalar)[i]); +#endif + + mp_mod_2exp(*scalar, SECURITY_BITS, NWORDS_ORDER); + } +} diff --git a/src/pqm4/sqisign_lvl1/ref/config.mk b/src/pqm4/sqisign_lvl1/ref/config.mk new file mode 100644 index 0000000..212eb4a --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/config.mk @@ -0,0 +1,2 @@ +elf/crypto_sign_sqisign_lvl1_ref_%.elf: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl1 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS +obj/libcrypto_sign_sqisign_lvl1_ref.a: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl1 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS diff --git a/src/pqm4/sqisign_lvl1/ref/e0_basis.c b/src/pqm4/sqisign_lvl1/ref/e0_basis.c new file mode 100644 index 0000000..5be2b8e --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/e0_basis.c @@ -0,0 +1,55 @@ +#include +const fp2_t BASIS_E0_PX = { +#if 0 +#elif RADIX == 16 +{0x107, 0xc, 0x1890, 0xf2a, 0x52b, 0xb68, 0x152d, 0xa4c, 0x1054, 0x642, 0x36a, 0x6f8, 0x7ad, 0x146c, 0x1d66, 0x1b67, 0x236, 0x10d, 0x1933, 0x3} +#elif RADIX == 32 +{0x3020e, 0xb795624, 0x5ab6829, 0x1514995, 0x1b5190a, 0x187ad37c, 0x19facd46, 0x8688db6, 0x3c998} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x52b795624001810, 0x8c8505452654b56d, 0xf59a8d87ad37c0da, 0x24e4cc21a236db3} +#else +{0x5bcab12000c08, 0x452654b56d052, 0x26f81b5190a0a, 0x36cfd66a361eb, 0x12726610d11b} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x1f87, 0x83e, 0x32e, 0xe58, 0xd9d, 0x1416, 0x752, 0x13b4, 0x1efa, 0xe62, 0x12f5, 0x1907, 0x1814, 0x1ddd, 0x1aa6, 0x1420, 0x2cd, 0x1431, 0x1be2, 0x7} +#elif RADIX == 32 +{0x120fbf0f, 0x1d72c0cb, 0xa54166c, 0x1bea7687, 0x197ab98b, 0x1b814c83, 0x8354ddd, 0x188b368, 0x2df15} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xcd9d72c0cb907df8, 0x5cc5efa9da1d4a82, 0x6a9bbbb814c83cbd, 0x26ef8a8622cda10} +#else +{0x6b96065c83efc, 0x29da1d4a82cd9, 0x190797ab98bdf, 0x6841aa6eeee05, 0x1377c5431166} +#endif +#endif +}; +const fp2_t BASIS_E0_QX = { +#if 0 +#elif RADIX == 16 +{0x5ff, 0x1783, 0xadc, 0x775, 0xad4, 0x593, 0xb4c, 0x21e, 0x1cb2, 0x13d8, 0x179f, 0x680, 0x1a9c, 0x1824, 0x118e, 0x13d9, 0x24, 0x1956, 0x1dd2, 0x9} +#elif RADIX == 32 +{0x5e0cbff, 0x143baab7, 0x9859356, 0x12c843cb, 0xbcfcf63, 0x9a9c340, 0x16631d82, 0xab00927, 0x4ee96} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x6ad43baab72f065f, 0xe7b1cb210f2d30b2, 0xc63b049a9c3405e7, 0x4ff74b2ac0249ec} +#else +{0x21dd55b97832f, 0x210f2d30b26ad, 0x680bcfcf6396, 0x27b318ec126a7, 0x4ffba5956012} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x1c7f, 0x1117, 0xa4, 0x1164, 0x6e, 0x1e63, 0x1b7b, 0x1305, 0x424, 0x131a, 0x1b61, 0xae3, 0x17b1, 0xe5e, 0x1848, 0x1e81, 0x14a5, 0x1cb5, 0x1d87, 0x8} +#elif RADIX == 32 +{0x445f8ff, 0xe8b2029, 0xf7e6303, 0x109260bb, 0x1db0cc68, 0x1d7b1571, 0x7090e5, 0x5ad297d, 0x3ec3f} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x606e8b2029222fc7, 0x6634424982edefcc, 0xe121cbd7b1571ed8, 0x4f761f96b4a5f40} +#else +{0x74590149117e3, 0x4982edefcc606, 0x2ae3db0cc6884, 0x7d0384872f5ec, 0x4fbb0fcb5a52} +#endif +#endif +}; diff --git a/src/pqm4/sqisign_lvl1/ref/e0_basis.h b/src/pqm4/sqisign_lvl1/ref/e0_basis.h new file mode 100644 index 0000000..05cafb8 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/e0_basis.h @@ -0,0 +1,3 @@ +#include +extern const fp2_t BASIS_E0_PX; +extern const fp2_t BASIS_E0_QX; diff --git a/src/pqm4/sqisign_lvl1/ref/ec.c b/src/pqm4/sqisign_lvl1/ref/ec.c new file mode 100644 index 0000000..be4e4e5 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/ec.c @@ -0,0 +1,665 @@ +#include +#include +#include +#include + +void +ec_point_init(ec_point_t *P) +{ // Initialize point as identity element (1:0) + fp2_set_one(&(P->x)); + fp2_set_zero(&(P->z)); +} + +void +ec_curve_init(ec_curve_t *E) +{ // Initialize the curve struct + // Initialize the constants + fp2_set_zero(&(E->A)); + fp2_set_one(&(E->C)); + + // Initialize the point (A+2 : 4C) + ec_point_init(&(E->A24)); + + // Set the bool to be false by default + E->is_A24_computed_and_normalized = false; +} + +void +select_point(ec_point_t *Q, const ec_point_t *P1, const ec_point_t *P2, const digit_t option) +{ // Select points in constant time + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +cswap_points(ec_point_t *P, ec_point_t *Q, const digit_t option) +{ // Swap points in constant time + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + fp2_cswap(&(P->x), &(Q->x), option); + fp2_cswap(&(P->z), &(Q->z), option); +} + +void +ec_normalize_point(ec_point_t *P) +{ + fp2_inv(&P->z); + fp2_mul(&P->x, &P->x, &P->z); + fp2_set_one(&(P->z)); +} + +void +ec_normalize_curve(ec_curve_t *E) +{ + fp2_inv(&E->C); + fp2_mul(&E->A, &E->A, &E->C); + fp2_set_one(&E->C); +} + +void +ec_curve_normalize_A24(ec_curve_t *E) +{ + if (!E->is_A24_computed_and_normalized) { + AC_to_A24(&E->A24, E); + ec_normalize_point(&E->A24); + E->is_A24_computed_and_normalized = true; + } + assert(fp2_is_one(&E->A24.z)); +} + +void +ec_normalize_curve_and_A24(ec_curve_t *E) +{ // Neither the curve or A24 are guaranteed to be normalized. + // First we normalize (A/C : 1) and conditionally compute + if (!fp2_is_one(&E->C)) { + ec_normalize_curve(E); + } + + if (!E->is_A24_computed_and_normalized) { + // Now compute A24 = ((A + 2) / 4 : 1) + fp2_add_one(&E->A24.x, &E->A); // re(A24.x) = re(A) + 1 + fp2_add_one(&E->A24.x, &E->A24.x); // re(A24.x) = re(A) + 2 + fp_copy(&E->A24.x.im, &E->A.im); // im(A24.x) = im(A) + + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 2 + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 4 + fp2_set_one(&E->A24.z); + + E->is_A24_computed_and_normalized = true; + } +} + +uint32_t +ec_is_zero(const ec_point_t *P) +{ + return fp2_is_zero(&P->z); +} + +uint32_t +ec_has_zero_coordinate(const ec_point_t *P) +{ + return fp2_is_zero(&P->x) | fp2_is_zero(&P->z); +} + +uint32_t +ec_is_equal(const ec_point_t *P, const ec_point_t *Q) +{ // Evaluate if two points in Montgomery coordinates (X:Z) are equal + // Returns 0xFFFFFFFF (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1; + + // Check if P, Q are the points at infinity + uint32_t l_zero = ec_is_zero(P); + uint32_t r_zero = ec_is_zero(Q); + + // Check if PX * QZ = QX * PZ + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + uint32_t lr_equal = fp2_is_equal(&t0, &t1); + + // Points are equal if + // - Both are zero, or + // - neither are zero AND PX * QZ = QX * PZ + return (l_zero & r_zero) | (~l_zero & ~r_zero * lr_equal); +} + +uint32_t +ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + if (ec_is_zero(P)) + return 0; + + uint32_t x_is_zero, tmp_is_zero; + fp2_t t0, t1, t2; + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t0, &t1); + fp2_mul(&t2, &t2, &E->A); + fp2_mul(&t1, &t1, &E->C); + fp2_add(&t1, &t1, &t1); + fp2_add(&t0, &t1, &t2); // 4 (CX^2+CZ^2+AXZ) + + x_is_zero = fp2_is_zero(&P->x); + tmp_is_zero = fp2_is_zero(&t0); + + // two torsion if x or x^2 + Ax + 1 is zero + return x_is_zero | tmp_is_zero; +} + +uint32_t +ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + ec_point_t test; + xDBL_A24(&test, P, &E->A24, E->is_A24_computed_and_normalized); + return ec_is_two_torsion(&test, E); +} + +uint32_t +ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E) +{ // Check if basis points (P, Q) form a full 2^t-basis + ec_point_t P2, Q2; + xDBL_A24(&P2, &B->P, &E->A24, E->is_A24_computed_and_normalized); + xDBL_A24(&Q2, &B->Q, &E->A24, E->is_A24_computed_and_normalized); + return (ec_is_two_torsion(&P2, E) & ec_is_two_torsion(&Q2, E) & ~ec_is_equal(&P2, &Q2)); +} + +int +ec_curve_verify_A(const fp2_t *A) +{ // Verify the Montgomery coefficient A is valid (A^2-4 \ne 0) + // Return 1 if curve is valid, 0 otherwise + fp2_t t; + fp2_set_one(&t); + fp_add(&t.re, &t.re, &t.re); // t=2 + if (fp2_is_equal(A, &t)) + return 0; + fp_neg(&t.re, &t.re); // t=-2 + if (fp2_is_equal(A, &t)) + return 0; + return 1; +} + +int +ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A) +{ // Initialize the curve from the A coefficient and check it is valid + // Return 1 if curve is valid, 0 otherwise + ec_curve_init(E); + fp2_copy(&E->A, A); // Set A + return ec_curve_verify_A(A); +} + +void +ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve) +{ // j-invariant computation for Montgommery coefficient A2=(A+2C:4C) + fp2_t t0, t1; + + fp2_sqr(&t1, &curve->C); + fp2_sqr(j_inv, &curve->A); + fp2_add(&t0, &t1, &t1); + fp2_sub(&t0, j_inv, &t0); + fp2_sub(&t0, &t0, &t1); + fp2_sub(j_inv, &t0, &t1); + fp2_sqr(&t1, &t1); + fp2_mul(j_inv, j_inv, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_sqr(&t1, &t0); + fp2_mul(&t0, &t0, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_inv(j_inv); + fp2_mul(j_inv, &t0, j_inv); +} + +void +xDBL_E0(ec_point_t *Q, const ec_point_t *P) +{ // Doubling of a Montgomery point in projective coordinates (X:Z) on the curve E0 with (A:C) = (0:1). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C) = (0:1). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&Q->z, &t1, &t2); + fp2_mul(&Q->z, &Q->z, &t2); +} + +void +xDBL(ec_point_t *Q, const ec_point_t *P, const ec_point_t *AC) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). Computation of coefficient values A+2C and 4C + // on-the-fly. + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t3, &AC->z, &AC->z); + fp2_mul(&t1, &t1, &t3); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&t0, &t3, &AC->x); + fp2_mul(&t0, &t0, &t2); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and + // the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + if (!A24_normalized) + fp2_mul(&t1, &t1, &A24->z); + fp2_mul(&Q->x, &t0, &t1); + fp2_mul(&t0, &t2, &A24->x); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ) +{ // Differential addition of Montgomery points in projective coordinates (X:Z). + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and difference + // PQ=P-Q=(XPQ:ZPQ). + // Output: projective Montgomery point R <- P+Q = (XR:ZR) such that x(P+Q)=XR/ZR. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_add(&t2, &Q->x, &Q->z); + fp2_sub(&t3, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t3); + fp2_mul(&t1, &t1, &t2); + fp2_add(&t2, &t0, &t1); + fp2_sub(&t3, &t0, &t1); + fp2_sqr(&t2, &t2); + fp2_sqr(&t3, &t3); + fp2_mul(&t2, &PQ->z, &t2); + fp2_mul(&R->z, &PQ->x, &t3); + fp2_copy(&R->x, &t2); +} + +void +xDBLADD(ec_point_t *R, + ec_point_t *S, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_point_t *A24, + const bool A24_normalized) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, the difference + // PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points R <- 2*P = (XR:ZR) such that x(2P)=XR/ZR, and S <- P+Q = (XS:ZS) such that = + // x(Q+P)=XS/ZS. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&R->x, &t0); + fp2_sub(&t2, &Q->x, &Q->z); + fp2_add(&S->x, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t2); + fp2_sqr(&R->z, &t1); + fp2_mul(&t1, &t1, &S->x); + fp2_sub(&t2, &R->x, &R->z); + if (!A24_normalized) + fp2_mul(&R->z, &R->z, &A24->z); + fp2_mul(&R->x, &R->x, &R->z); + fp2_mul(&S->x, &A24->x, &t2); + fp2_sub(&S->z, &t0, &t1); + fp2_add(&R->z, &R->z, &S->x); + fp2_add(&S->x, &t0, &t1); + fp2_mul(&R->z, &R->z, &t2); + fp2_sqr(&S->z, &S->z); + fp2_sqr(&S->x, &S->x); + fp2_mul(&S->z, &S->z, &PQ->x); + fp2_mul(&S->x, &S->x, &PQ->z); +} + +void +xMUL(ec_point_t *Q, const ec_point_t *P, const digit_t *k, const int kbits, const ec_curve_t *curve) +{ // The Montgomery ladder + // Input: projective Montgomery point P=(XP:ZP) such that xP=XP/ZP, a scalar k of bitlength kbits, and + // the Montgomery curve constants (A:C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points Q <- k*P = (XQ:ZQ) such that x(k*P)=XQ/ZQ. + ec_point_t R0, R1, A24; + digit_t mask; + unsigned int bit, prevbit = 0, swap; + + if (!curve->is_A24_computed_and_normalized) { + // Computation of A24=(A+2C:4C) + fp2_add(&A24.x, &curve->C, &curve->C); + fp2_add(&A24.z, &A24.x, &A24.x); + fp2_add(&A24.x, &A24.x, &curve->A); + } else { + fp2_copy(&A24.x, &curve->A24.x); + fp2_copy(&A24.z, &curve->A24.z); + // Assert A24 has been normalised + assert(fp2_is_one(&A24.z)); + } + + // R0 <- (1:0), R1 <- P + ec_point_init(&R0); + fp2_copy(&R1.x, &P->x); + fp2_copy(&R1.z, &P->z); + + // Main loop + for (int i = kbits - 1; i >= 0; i--) { + bit = (k[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t)swap; + + cswap_points(&R0, &R1, mask); + xDBLADD(&R0, &R1, &R0, &R1, P, &A24, true); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t)swap; + cswap_points(&R0, &R1, mask); + + fp2_copy(&Q->x, &R0.x); + fp2_copy(&Q->z, &R0.z); +} + +int +xDBLMUL(ec_point_t *S, + const ec_point_t *P, + const digit_t *k, + const ec_point_t *Q, + const digit_t *l, + const ec_point_t *PQ, + const int kbits, + const ec_curve_t *curve) +{ // The Montgomery biladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, scalars k and l of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants (A:C). + // Output: projective Montgomery point S <- k*P + l*Q = (XS:ZS) such that x(k*P + l*Q)=XS/ZS. + + int i, A_is_zero; + digit_t evens, mevens, bitk0, bitl0, maskk, maskl, temp, bs1_ip1, bs2_ip1, bs1_i, bs2_i, h; + digit_t sigma[2] = { 0 }, pre_sigma = 0; + digit_t k_t[NWORDS_ORDER], l_t[NWORDS_ORDER], one[NWORDS_ORDER] = { 0 }, r[2 * BITS] = { 0 }; + ec_point_t DIFF1a, DIFF1b, DIFF2a, DIFF2b, R[3] = { 0 }, T[3]; + + // differential additions formulas are invalid in this case + if (ec_has_zero_coordinate(P) | ec_has_zero_coordinate(Q) | ec_has_zero_coordinate(PQ)) + return 0; + + // Derive sigma according to parity + bitk0 = (k[0] & 1); + bitl0 = (l[0] & 1); + maskk = 0 - bitk0; // Parity masks: 0 if even, otherwise 1...1 + maskl = 0 - bitl0; + sigma[0] = (bitk0 ^ 1); + sigma[1] = (bitl0 ^ 1); + evens = sigma[0] + sigma[1]; // Count number of even scalars + mevens = 0 - (evens & 1); // Mask mevens <- 0 if # even of scalars = 0 or 2, otherwise mevens = 1...1 + + // If k and l are both even or both odd, pick sigma = (0,1) + sigma[0] = (sigma[0] & mevens); + sigma[1] = (sigma[1] & mevens) | (1 & ~mevens); + + // Convert even scalars to odd + one[0] = 1; + mp_sub(k_t, k, one, NWORDS_ORDER); + mp_sub(l_t, l, one, NWORDS_ORDER); + select_ct(k_t, k_t, k, maskk, NWORDS_ORDER); + select_ct(l_t, l_t, l, maskl, NWORDS_ORDER); + + // Scalar recoding + for (i = 0; i < kbits; i++) { + // If sigma[0] = 1 swap k_t and l_t + maskk = 0 - (sigma[0] ^ pre_sigma); + swap_ct(k_t, l_t, maskk, NWORDS_ORDER); + + if (i == kbits - 1) { + bs1_ip1 = 0; + bs2_ip1 = 0; + } else { + bs1_ip1 = mp_shiftr(k_t, 1, NWORDS_ORDER); + bs2_ip1 = mp_shiftr(l_t, 1, NWORDS_ORDER); + } + bs1_i = k_t[0] & 1; + bs2_i = l_t[0] & 1; + + r[2 * i] = bs1_i ^ bs1_ip1; + r[2 * i + 1] = bs2_i ^ bs2_ip1; + + // Revert sigma if second bit, r_(2i+1), is 1 + pre_sigma = sigma[0]; + maskk = 0 - r[2 * i + 1]; + select_ct(&temp, &sigma[0], &sigma[1], maskk, 1); + select_ct(&sigma[1], &sigma[1], &sigma[0], maskk, 1); + sigma[0] = temp; + } + + // Point initialization + ec_point_init(&R[0]); + maskk = 0 - sigma[0]; + select_point(&R[1], P, Q, maskk); + select_point(&R[2], Q, P, maskk); + + fp2_copy(&DIFF1a.x, &R[1].x); + fp2_copy(&DIFF1a.z, &R[1].z); + fp2_copy(&DIFF1b.x, &R[2].x); + fp2_copy(&DIFF1b.z, &R[2].z); + + // Initialize DIFF2a <- P+Q, DIFF2b <- P-Q + xADD(&R[2], &R[1], &R[2], PQ); + if (ec_has_zero_coordinate(&R[2])) + return 0; // non valid formulas + + fp2_copy(&DIFF2a.x, &R[2].x); + fp2_copy(&DIFF2a.z, &R[2].z); + fp2_copy(&DIFF2b.x, &PQ->x); + fp2_copy(&DIFF2b.z, &PQ->z); + + A_is_zero = fp2_is_zero(&curve->A); + + // Main loop + for (i = kbits - 1; i >= 0; i--) { + h = r[2 * i] + r[2 * i + 1]; // in {0, 1, 2} + maskk = 0 - (h & 1); + select_point(&T[0], &R[0], &R[1], maskk); + maskk = 0 - (h >> 1); + select_point(&T[0], &T[0], &R[2], maskk); + if (A_is_zero) { + xDBL_E0(&T[0], &T[0]); + } else { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(&T[0], &T[0], &curve->A24, true); + } + + maskk = 0 - r[2 * i + 1]; // in {0, 1} + select_point(&T[1], &R[0], &R[1], maskk); + select_point(&T[2], &R[1], &R[2], maskk); + + cswap_points(&DIFF1a, &DIFF1b, maskk); + xADD(&T[1], &T[1], &T[2], &DIFF1a); + xADD(&T[2], &R[0], &R[2], &DIFF2a); + + // If hw (mod 2) = 1 then swap DIFF2a and DIFF2b + maskk = 0 - (h & 1); + cswap_points(&DIFF2a, &DIFF2b, maskk); + + // R <- T + copy_point(&R[0], &T[0]); + copy_point(&R[1], &T[1]); + copy_point(&R[2], &T[2]); + } + + // Output R[evens] + select_point(S, &R[0], &R[1], mevens); + + maskk = 0 - (bitk0 & bitl0); + select_point(S, S, &R[2], maskk); + return 1; +} + +int +ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *E) +{ // The 3-point Montgomery ladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, a scalar k of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C/4C:1). + // Output: projective Montgomery point R <- P + m*Q = (XR:ZR) such that x(P + m*Q)=XR/ZR. + assert(E->is_A24_computed_and_normalized); + if (!fp2_is_one(&E->A24.z)) { + return 0; + } + // Formulas are not valid in that case + if (ec_has_zero_coordinate(PQ)) { + return 0; + } + + ec_point_t X0, X1, X2; + copy_point(&X0, Q); + copy_point(&X1, P); + copy_point(&X2, PQ); + + int i, j; + digit_t t; + for (i = 0; i < NWORDS_ORDER; i++) { + t = 1; + for (j = 0; j < RADIX; j++) { + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + xDBLADD(&X0, &X1, &X0, &X1, &X2, &E->A24, true); + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + t <<= 1; + }; + }; + copy_point(R, &X1); + return 1; +} + +// WRAPPERS to export + +void +ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve) +{ + // If A24 = ((A+2)/4 : 1) we save multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + } else { + // Otherwise we compute A24 on the fly for doubling + xDBL(res, P, (const ec_point_t *)curve); + } +} + +void +ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve) +{ + if (n == 0) { + copy_point(res, P); + return; + } + + // When the chain is long enough, we should normalise A24 + if (n > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is normalized we can save some multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + for (int i = 0; i < n - 1; i++) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, res, &curve->A24, true); + } + } else { + // Otherwise we do normal doubling + xDBL(res, P, (const ec_point_t *)curve); + for (int i = 0; i < n - 1; i++) { + xDBL(res, res, (const ec_point_t *)curve); + } + } +} + +void +ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve) +{ + ec_dbl_iter(&res->P, n, &B->P, curve); + ec_dbl_iter(&res->Q, n, &B->Q, curve); + ec_dbl_iter(&res->PmQ, n, &B->PmQ, curve); +} + +void +ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve) +{ + // For large scalars it's worth normalising anyway + if (kbits > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is computed and normalized we save some Fp2 multiplications + xMUL(res, P, scalar, kbits, curve); +} + +int +ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve) +{ + if (fp2_is_zero(&PQ->PmQ.z)) + return 0; + + /* Differential additions behave badly when PmQ = (0:1), so we need to + * treat this case specifically. Since we assume P, Q are a basis, this + * can happen only if kbits==1 */ + if (kbits == 1) { + // Sanity check: our basis should be given by 2-torsion points + if (!ec_is_two_torsion(&PQ->P, curve) || !ec_is_two_torsion(&PQ->Q, curve) || + !ec_is_two_torsion(&PQ->PmQ, curve)) + return 0; + digit_t bP, bQ; + bP = (scalarP[0] & 1); + bQ = (scalarQ[0] & 1); + if (bP == 0 && bQ == 0) + ec_point_init(res); //(1: 0) + else if (bP == 1 && bQ == 0) + copy_point(res, &PQ->P); + else if (bP == 0 && bQ == 1) + copy_point(res, &PQ->Q); + else if (bP == 1 && bQ == 1) + copy_point(res, &PQ->PmQ); + else // should never happen + assert(0); + return 1; + } else { + ec_curve_t E; + copy_curve(&E, curve); + + if (!fp2_is_zero(&curve->A)) { // If A is not zero normalize + ec_curve_normalize_A24(&E); + } + return xDBLMUL(res, &PQ->P, scalarP, &PQ->Q, scalarQ, &PQ->PmQ, kbits, (const ec_curve_t *)&E); + } +} diff --git a/src/pqm4/sqisign_lvl1/ref/ec.h b/src/pqm4/sqisign_lvl1/ref/ec.h new file mode 100644 index 0000000..ee2be38 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/ec.h @@ -0,0 +1,668 @@ +/** @file + * + * @authors Luca De Feo, Francisco RH + * + * @brief Elliptic curve stuff + */ + +#ifndef EC_H +#define EC_H +#include +#include +#include +#include +#include + +/** @defgroup ec Elliptic curves + * @{ + */ + +/** @defgroup ec_t Data structures + * @{ + */ + +/** @brief Projective point on the Kummer line E/pm 1 in Montgomery coordinates + * + * @typedef ec_point_t + * + * @struct ec_point_t + * + * A projective point in (X:Z) or (X:Y:Z) coordinates (tbd). + */ +typedef struct ec_point_t +{ + fp2_t x; + fp2_t z; +} ec_point_t; + +/** @brief Projective point in Montgomery coordinates + * + * @typedef jac_point_t + * + * @struct jac_point_t + * + * A projective point in (X:Y:Z) coordinates + */ +typedef struct jac_point_t +{ + fp2_t x; + fp2_t y; + fp2_t z; +} jac_point_t; + +/** @brief Addition components + * + * @typedef add_components_t + * + * @struct add_components_t + * + * 3 components u,v,w that define the (X:Z) coordinates of both + * addition and substraction of two distinct points with + * P+Q =(u-v:w) and P-Q = (u+v=w) + */ +typedef struct add_components_t +{ + fp2_t u; + fp2_t v; + fp2_t w; +} add_components_t; + +/** @brief A basis of a torsion subgroup + * + * @typedef ec_basis_t + * + * @struct ec_basis_t + * + * A pair of points (or a triplet, tbd) forming a basis of a torsion subgroup. + */ +typedef struct ec_basis_t +{ + ec_point_t P; + ec_point_t Q; + ec_point_t PmQ; +} ec_basis_t; + +/** @brief An elliptic curve + * + * @typedef ec_curve_t + * + * @struct ec_curve_t + * + * An elliptic curve in projective Montgomery form + */ +typedef struct ec_curve_t +{ + fp2_t A; + fp2_t C; ///< cannot be 0 + ec_point_t A24; // the point (A+2 : 4C) + bool is_A24_computed_and_normalized; // says if A24 has been computed and normalized +} ec_curve_t; + +/** @brief An isogeny of degree a power of 2 + * + * @typedef ec_isog_even_t + * + * @struct ec_isog_even_t + */ +typedef struct ec_isog_even_t +{ + ec_curve_t curve; ///< The domain curve + ec_point_t kernel; ///< A kernel generator + unsigned length; ///< The length as a 2-isogeny walk +} ec_isog_even_t; + +/** @brief Isomorphism of Montgomery curves + * + * @typedef ec_isom_t + * + * @struct ec_isom_t + * + * The isomorphism is given by the map maps (X:Z) ↦ ( (Nx X + Nz Z) : (D Z) ) + */ +typedef struct ec_isom_t +{ + fp2_t Nx; + fp2_t Nz; + fp2_t D; +} ec_isom_t; + +// end ec_t +/** @} + */ + +/** @defgroup ec_curve_t Curves and isomorphisms + * @{ + */ + +// Initalisation for curves and points +void ec_curve_init(ec_curve_t *E); +void ec_point_init(ec_point_t *P); + +/** + * @brief Verify that a Montgomery coefficient is valid + * + * @param A an fp2_t + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_verify_A(const fp2_t *A); + +/** + * @brief Initialize an elliptic curve from a coefficient + * + * @param A an fp2_t + * @param E the elliptic curve to initialize + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A); + +// Copying points, bases and curves +static inline void +copy_point(ec_point_t *P, const ec_point_t *Q) +{ + fp2_copy(&P->x, &Q->x); + fp2_copy(&P->z, &Q->z); +} + +static inline void +copy_basis(ec_basis_t *B1, const ec_basis_t *B0) +{ + copy_point(&B1->P, &B0->P); + copy_point(&B1->Q, &B0->Q); + copy_point(&B1->PmQ, &B0->PmQ); +} + +static inline void +copy_curve(ec_curve_t *E1, const ec_curve_t *E2) +{ + fp2_copy(&(E1->A), &(E2->A)); + fp2_copy(&(E1->C), &(E2->C)); + E1->is_A24_computed_and_normalized = E2->is_A24_computed_and_normalized; + copy_point(&E1->A24, &E2->A24); +} + +// Functions for working with the A24 point and normalisation + +/** + * @brief Reduce (A : C) to (A/C : 1) in place + * + * @param E a curve + */ +void ec_normalize_curve(ec_curve_t *E); + +/** + * @brief Reduce (A + 2 : 4C) to ((A+2)/4C : 1) in place + * + * @param E a curve + */ +void ec_curve_normalize_A24(ec_curve_t *E); + +/** + * @brief Normalise both (A : C) and (A + 2 : 4C) as above, in place + * + * @param E a curve + */ +void ec_normalize_curve_and_A24(ec_curve_t *E); + +/** + * @brief Given a curve E, compute (A+2 : 4C) + * + * @param A24 the value (A+2 : 4C) to return into + * @param E a curve + */ +static inline void +AC_to_A24(ec_point_t *A24, const ec_curve_t *E) +{ + // Maybe we already have this computed + if (E->is_A24_computed_and_normalized) { + copy_point(A24, &E->A24); + return; + } + + // A24 = (A+2C : 4C) + fp2_add(&A24->z, &E->C, &E->C); + fp2_add(&A24->x, &E->A, &A24->z); + fp2_add(&A24->z, &A24->z, &A24->z); +} + +/** + * @brief Given a curve the point (A+2 : 4C) compute the curve coefficients (A : C) + * + * @param E a curve to compute + * @param A24 the value (A+2 : 4C) + */ +static inline void +A24_to_AC(ec_curve_t *E, const ec_point_t *A24) +{ + // (A:C) = ((A+2C)*2-4C : 4C) + fp2_add(&E->A, &A24->x, &A24->x); + fp2_sub(&E->A, &E->A, &A24->z); + fp2_add(&E->A, &E->A, &E->A); + fp2_copy(&E->C, &A24->z); +} + +/** + * @brief j-invariant. + * + * @param j_inv computed j_invariant + * @param curve input curve + */ +void ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve); + +/** + * @brief Isomorphism of elliptic curve + * Takes as input two isomorphic Kummer lines in Montgomery form, and output an isomorphism between + * them + * + * @param isom computed isomorphism + * @param from domain curve + * @param to image curve + * @return 0xFFFFFFFF if there was an error during the computation, zero otherwise + */ +uint32_t ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to); + +/** + * @brief In-place evaluation of an isomorphism + * + * @param P a point + * @param isom an isomorphism + */ +void ec_iso_eval(ec_point_t *P, ec_isom_t *isom); + +/** @} + */ +/** @defgroup ec_point_t Point operations + * @{ + */ + +/** + * @brief Point equality + * + * @param P a point + * @param Q a point + * @return 0xFFFFFFFF if equal, zero otherwise + */ +uint32_t ec_is_equal(const ec_point_t *P, const ec_point_t *Q); + +/** + * @brief Point equality + * + * @param P a point + * @return 0xFFFFFFFF if point at infinity, zero otherwise + */ +uint32_t ec_is_zero(const ec_point_t *P); + +/** + * @brief Two torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Four torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Reduce Z-coordinate of point in place + * + * @param P a point + */ +void ec_normalize_point(ec_point_t *P); + +void xDBL_E0(ec_point_t *Q, const ec_point_t *P); +void xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ); +void xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized); + +/** + * @brief Point doubling + * + * @param res computed double of P + * @param P a point + * @param curve an elliptic curve + */ +void ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve); + +/** + * @brief Point iterated doubling + * + * @param res computed double of P + * @param P a point + * @param n the number of double + * @param curve the curve on which P lays + */ +void ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Iterated doubling for a basis P, Q, PmQ + * + * @param res the computed iterated double of basis B + * @param n the number of doubles + * @param B the basis to double + * @param curve the parent curve of the basis + */ +void ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve); + +/** + * @brief Point multiplication + * + * @param res computed scalar * P + * @param curve the curve + * @param scalar an unsigned multi-precision integer + * @param P a point + * @param kbits numer of bits of the scalar + */ +void ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Combination P+m*Q + * + * @param R computed P + m * Q + * @param curve the curve + * @param m an unsigned multi-precision integer + * @param P a point + * @param Q a point + * @param PQ the difference P-Q + * @return 0 if there was an error, 1 otherwise + */ +int ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *curve); + +/** + * @brief Linear combination of points of a basis + * + * @param res computed scalarP * P + scalarQ * Q + * @param scalarP an unsigned multi-precision integer + * @param scalarQ an unsigned multi-precision integer + * @param kbits number of bits of the scalars, or n for points of order 2^n + * @param PQ a torsion basis consisting of points P and Q + * @param curve the curve + * + * @return 0 if there was an error, 1 otherwise + */ +int ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve); + +// end point computations +/** + * @} + */ + +/** @defgroup ec_dlog_t Torsion basis computations + * @{ + */ + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve along with a hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * + * @return A hint + * + * The algorithm is deterministc + */ +uint8_t ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f); + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve and a given hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * @param hint the hint + * + * @return 1 is the basis is valid, 0 otherwise + * + * The algorithm is deterministc + */ +int ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint); +/** // end basis computations + * @} + */ + +/** @defgroup ec_isog_t Isogenies + * @{ + */ + +/** + * @brief Evaluate isogeny of even degree on list of points. + * Returns 0 if successful and -1 if kernel has the wrong order or includes (0:1). + * + * @param image computed image curve + * @param phi isogeny + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points); + +/** + * @brief Multiplicative strategy for a short isogeny chain. Returns 1 if successfull and -1 + * if kernel has the wrong order or includes (0:1) when special=false. + * + * @param curve domain curve, to be overwritten by the codomain curve. + * @param kernel a kernel generator of order 2^len + * @param len the length of t he 2-isogeny chain + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * @param special if true, allow isogenies with (0:1) in the kernel + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special); + +/** + * @brief Recover Y-coordinate from X-coordinate and curve coefficients. + * + * @param y: a y-coordinate + * @param Px: a x-coordinate + * @param curve: the elliptic curve + * + * @return 0xFFFFFFFF if the point was on the curve, 0 otherwise + */ +uint32_t ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve); + +// Jacobian point init and copying +void jac_init(jac_point_t *P); +void copy_jac_point(jac_point_t *P, const jac_point_t *Q); + +/** + * @brief Test if two Jacobian points are equal + * + * @param P: a point + * @param Q: a point + * + * @return 0xFFFFFFFF if they are equal, 0 otherwise + */ +uint32_t jac_is_equal(const jac_point_t *P, const jac_point_t *Q); + +// Convert from Jacobian to x-only (just drop the Y-coordinate) +void jac_to_xz(ec_point_t *P, const jac_point_t *xyP); +// Convert from Jacobian coordinates in Montgomery model to Weierstrass +void jac_to_ws(jac_point_t *P, fp2_t *t, fp2_t *ao3, const jac_point_t *Q, const ec_curve_t *curve); +void jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve); + +// Jacobian arithmetic +void jac_neg(jac_point_t *Q, const jac_point_t *P); +void ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); +void DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC); +void DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t); +void jac_to_xz_add_components(add_components_t *uvw, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + * + * + * Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and + * the point P = (X/Z : 1). For generic implementation see lift_basis() + */ +uint32_t lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + */ +uint32_t lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Check if basis points (P, Q) form a full 4-basis + * + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if they form a basis, 0 otherwise + */ +uint32_t ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E); + +/* + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Test functions for printing and order checking, only used in debug mode + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +/** + * @brief Check if a point (X : Z) has order exactly 2^t + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_point_order_twof(const ec_point_t *P, const ec_curve_t *E, int t) +{ + ec_point_t test; + ec_curve_t curve; + test = *P; + copy_curve(&curve, E); + + if (ec_is_zero(&test)) + return 0; + // Scale point by 2^(t-1) + ec_dbl_iter(&test, t - 1, &test, &curve); + // If it's zero now, it doesnt have order 2^t + if (ec_is_zero(&test)) + return 0; + // Ensure [2^t] P = 0 + ec_dbl(&test, &test, &curve); + return ec_is_zero(&test); +} + +/** + * @brief Check if basis points (P, Q, PmQ) all have order exactly 2^t + * + * @param B: a basis + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_basis_order_twof(const ec_basis_t *B, const ec_curve_t *E, int t) +{ + int check_P = test_point_order_twof(&B->P, E, t); + int check_Q = test_point_order_twof(&B->Q, E, t); + int check_PmQ = test_point_order_twof(&B->PmQ, E, t); + + return check_P & check_Q & check_PmQ; +} + +/** + * @brief Check if a Jacobian point (X : Y : Z) has order exactly 2^f + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_jac_order_twof(const jac_point_t *P, const ec_curve_t *E, int t) +{ + jac_point_t test; + test = *P; + if (fp2_is_zero(&test.z)) + return 0; + for (int i = 0; i < t - 1; i++) { + DBL(&test, &test, E); + } + if (fp2_is_zero(&test.z)) + return 0; + DBL(&test, &test, E); + return (fp2_is_zero(&test.z)); +} + +// Prints the x-coordinate of the point (X : 1) +static void +ec_point_print(const char *name, ec_point_t P) +{ + fp2_t a; + if (fp2_is_zero(&P.z)) { + printf("%s = INF\n", name); + } else { + fp2_copy(&a, &P.z); + fp2_inv(&a); + fp2_mul(&a, &a, &P.x); + fp2_print(name, &a); + } +} + +// Prints the Montgomery coefficient A +static void +ec_curve_print(const char *name, ec_curve_t E) +{ + fp2_t a; + fp2_copy(&a, &E.C); + fp2_inv(&a); + fp2_mul(&a, &a, &E.A); + fp2_print(name, &a); +} + +#endif +// end isogeny computations +/** + * @} + */ + +// end ec +/** + * @} + */ diff --git a/src/pqm4/sqisign_lvl1/ref/ec_jac.c b/src/pqm4/sqisign_lvl1/ref/ec_jac.c new file mode 100644 index 0000000..20ca68c --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/ec_jac.c @@ -0,0 +1,335 @@ +#include +#include + +void +jac_init(jac_point_t *P) +{ // Initialize Montgomery in Jacobian coordinates as identity element (0:1:0) + fp2_set_zero(&P->x); + fp2_set_one(&P->y); + fp2_set_zero(&P->z); +} + +uint32_t +jac_is_equal(const jac_point_t *P, const jac_point_t *Q) +{ // Evaluate if two points in Jacobian coordinates (X:Y:Z) are equal + // Returns 1 (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1, t2, t3; + + fp2_sqr(&t0, &Q->z); + fp2_mul(&t2, &P->x, &t0); // x1*z2^2 + fp2_sqr(&t1, &P->z); + fp2_mul(&t3, &Q->x, &t1); // x2*z1^2 + fp2_sub(&t2, &t2, &t3); + + fp2_mul(&t0, &t0, &Q->z); + fp2_mul(&t0, &P->y, &t0); // y1*z2^3 + fp2_mul(&t1, &t1, &P->z); + fp2_mul(&t1, &Q->y, &t1); // y2*z1^3 + fp2_sub(&t0, &t0, &t1); + + return fp2_is_zero(&t0) & fp2_is_zero(&t2); +} + +void +jac_to_xz(ec_point_t *P, const jac_point_t *xyP) +{ + fp2_copy(&P->x, &xyP->x); + fp2_copy(&P->z, &xyP->z); + fp2_sqr(&P->z, &P->z); + + // If xyP = (0:1:0), we currently have P=(0 : 0) but we want to set P=(1:0) + uint32_t c1, c2; + fp2_t one; + fp2_set_one(&one); + + c1 = fp2_is_zero(&P->x); + c2 = fp2_is_zero(&P->z); + fp2_select(&P->x, &P->x, &one, c1 & c2); +} + +void +jac_to_ws(jac_point_t *Q, fp2_t *t, fp2_t *ao3, const jac_point_t *P, const ec_curve_t *curve) +{ + // Cost of 3M + 2S when A != 0. + fp_t one; + fp2_t a; + /* a = 1 - A^2/3, U = X + (A*Z^2)/3, V = Y, W = Z, T = a*Z^4*/ + fp_set_one(&one); + if (!fp2_is_zero(&(curve->A))) { + fp_div3(&(ao3->re), &(curve->A.re)); + fp_div3(&(ao3->im), &(curve->A.im)); + fp2_sqr(t, &P->z); + fp2_mul(&Q->x, ao3, t); + fp2_add(&Q->x, &Q->x, &P->x); + fp2_sqr(t, t); + fp2_mul(&a, ao3, &(curve->A)); + fp_sub(&(a.re), &one, &(a.re)); + fp_neg(&(a.im), &(a.im)); + fp2_mul(t, t, &a); + } else { + fp2_copy(&Q->x, &P->x); + fp2_sqr(t, &P->z); + fp2_sqr(t, t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve) +{ + // Cost of 1M + 1S when A != 0. + fp2_t t; + /* X = U - (A*W^2)/3, Y = V, Z = W. */ + if (!fp2_is_zero(&(curve->A))) { + fp2_sqr(&t, &P->z); + fp2_mul(&t, &t, ao3); + fp2_sub(&Q->x, &P->x, &t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +copy_jac_point(jac_point_t *P, const jac_point_t *Q) +{ + fp2_copy(&(P->x), &(Q->x)); + fp2_copy(&(P->y), &(Q->y)); + fp2_copy(&(P->z), &(Q->z)); +} + +void +jac_neg(jac_point_t *Q, const jac_point_t *P) +{ + fp2_copy(&Q->x, &P->x); + fp2_neg(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC) +{ // Cost of 6M + 6S. + // Doubling on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding to + // (X/Z^2,Y/Z^3) This version receives the coefficient value A + fp2_t t0, t1, t2, t3; + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // t0 = 3x1^2 + fp2_sqr(&t1, &P->z); // t1 = z1^2 + fp2_mul(&t2, &P->x, &AC->A); + fp2_add(&t2, &t2, &t2); // t2 = 2Ax1 + fp2_add(&t2, &t1, &t2); // t2 = 2Ax1+z1^2 + fp2_mul(&t2, &t1, &t2); // t2 = z1^2(2Ax1+z1^2) + fp2_add(&t2, &t0, &t2); // t2 = alpha = 3x1^2 + z1^2(2Ax1+z1^2) + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); // z2 = 2y1z1 + fp2_sqr(&t0, &Q->z); + fp2_mul(&t0, &t0, &AC->A); // t0 = 4Ay1^2z1^2 + fp2_sqr(&t1, &P->y); + fp2_add(&t1, &t1, &t1); // t1 = 2y1^2 + fp2_add(&t3, &P->x, &P->x); // t3 = 2x1 + fp2_mul(&t3, &t1, &t3); // t3 = 4x1y1^2 + fp2_sqr(&Q->x, &t2); // x2 = alpha^2 + fp2_sub(&Q->x, &Q->x, &t0); // x2 = alpha^2 - 4Ay1^2z1^2 + fp2_sub(&Q->x, &Q->x, &t3); + fp2_sub(&Q->x, &Q->x, &t3); // x2 = alpha^2 - 4Ay1^2z1^2 - 8x1y1^2 + fp2_sub(&Q->y, &t3, &Q->x); // y2 = 4x1y1^2 - x2 + fp2_mul(&Q->y, &Q->y, &t2); // y2 = alpha(4x1y1^2 - x2) + fp2_sqr(&t1, &t1); // t1 = 4y1^4 + fp2_sub(&Q->y, &Q->y, &t1); + fp2_sub(&Q->y, &Q->y, &t1); // y2 = alpha(4x1y1^2 - x2) - 8y1^4 + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t) +{ // Cost of 3M + 5S. + // Doubling on a Weierstrass curve, representation in modified Jacobian coordinates + // (X:Y:Z:T=a*Z^4) corresponding to (X/Z^2,Y/Z^3), where a is the curve coefficient. + // Formula from https://hyperelliptic.org/EFD/g1p/auto-shortw-modified.html + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_t xx, c, cc, r, s, m; + // XX = X^2 + fp2_sqr(&xx, &P->x); + // A = 2*Y^2 + fp2_sqr(&c, &P->y); + fp2_add(&c, &c, &c); + // AA = A^2 + fp2_sqr(&cc, &c); + // R = 2*AA + fp2_add(&r, &cc, &cc); + // S = (X+A)^2-XX-AA + fp2_add(&s, &P->x, &c); + fp2_sqr(&s, &s); + fp2_sub(&s, &s, &xx); + fp2_sub(&s, &s, &cc); + // M = 3*XX+T1 + fp2_add(&m, &xx, &xx); + fp2_add(&m, &m, &xx); + fp2_add(&m, &m, t); + // X3 = M^2-2*S + fp2_sqr(&Q->x, &m); + fp2_sub(&Q->x, &Q->x, &s); + fp2_sub(&Q->x, &Q->x, &s); + // Z3 = 2*Y*Z + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); + // Y3 = M*(S-X3)-R + fp2_sub(&Q->y, &s, &Q->x); + fp2_mul(&Q->y, &Q->y, &m); + fp2_sub(&Q->y, &Q->y, &r); + // T3 = 2*R*T1 + fp2_mul(u, t, &r); + fp2_add(u, u, u); + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +select_jac_point(jac_point_t *Q, const jac_point_t *P1, const jac_point_t *P2, const digit_t option) +{ // Select points + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->y), &(P1->y), &(P2->y), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Addition on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding + // to (x,y) = (X/Z^2,Y/Z^3) This version receives the coefficient value A + // + // Complete routine, to handle all edge cases: + // if ZP == 0: # P == inf + // return Q + // if ZQ == 0: # Q == inf + // return P + // dy <- YQ*ZP**3 - YP*ZQ**3 + // dx <- XQ*ZP**2 - XP*ZQ**2 + // if dx == 0: # x1 == x2 + // if dy == 0: # ... and y1 == y2: doubling case + // dy <- ZP*ZQ * (3*XP^2 + ZP^2 * (2*A*XP + ZP^2)) + // dx <- 2*YP*ZP + // else: # ... but y1 != y2, thus P = -Q + // return inf + // XR <- dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) + // YR <- dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3 + // ZR <- dx * ZP * ZQ + + // Constant time processing: + // - The case for P == 0 or Q == 0 is handled at the end with conditional select + // - dy and dx are computed for both the normal and doubling cases, we switch when + // dx == dy == 0 for the normal case. + // - If we have that P = -Q then dx = 0 and so ZR will be zero, giving us the point + // at infinity for "free". + // + // These current formula are expensive and I'm probably missing some tricks... + // Thought I'd get the ball rolling. + // Cost 17M + 6S + 13a + fp2_t t0, t1, t2, t3, u1, u2, v1, dx, dy; + + /* If P is zero or Q is zero we will conditionally swap before returning. */ + uint32_t ctl1 = fp2_is_zero(&P->z); + uint32_t ctl2 = fp2_is_zero(&Q->z); + + /* Precompute some values */ + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + + /* Compute dy and dx for ordinary case */ + fp2_mul(&v1, &t1, &Q->z); // v1 = z2^3 + fp2_mul(&t2, &t0, &P->z); // t2 = z1^3 + fp2_mul(&v1, &v1, &P->y); // v1 = y1z2^3 + fp2_mul(&t2, &t2, &Q->y); // t2 = y2z1^3 + fp2_sub(&dy, &t2, &v1); // dy = y2z1^3 - y1z2^3 + fp2_mul(&u2, &t0, &Q->x); // u2 = x2z1^2 + fp2_mul(&u1, &t1, &P->x); // u1 = x1z2^2 + fp2_sub(&dx, &u2, &u1); // dx = x2z1^2 - x1z2^2 + + /* Compute dy and dx for doubling case */ + fp2_add(&t1, &P->y, &P->y); // dx_dbl = t1 = 2y1 + fp2_add(&t2, &AC->A, &AC->A); // t2 = 2A + fp2_mul(&t2, &t2, &P->x); // t2 = 2Ax1 + fp2_add(&t2, &t2, &t0); // t2 = 2Ax1 + z1^2 + fp2_mul(&t2, &t2, &t0); // t2 = z1^2 * (2Ax1 + z1^2) + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t2, &t2, &t0); // t2 = x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 2*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 3*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_mul(&t2, &t2, &Q->z); // dy_dbl = t2 = z2 * (3*x1^2 + z1^2 * (2Ax1 + z1^2)) + + /* If dx is zero and dy is zero swap with double variables */ + uint32_t ctl = fp2_is_zero(&dx) & fp2_is_zero(&dy); + fp2_select(&dx, &dx, &t1, ctl); + fp2_select(&dy, &dy, &t2, ctl); + + /* Some more precomputations */ + fp2_mul(&t0, &P->z, &Q->z); // t0 = z1z2 + fp2_sqr(&t1, &t0); // t1 = z1z2^2 + fp2_sqr(&t2, &dx); // t2 = dx^2 + fp2_sqr(&t3, &dy); // t3 = dy^2 + + /* Compute x3 = dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) */ + fp2_mul(&R->x, &AC->A, &t1); // x3 = A*(z1z2)^2 + fp2_add(&R->x, &R->x, &u1); // x3 = A*(z1z2)^2 + u1 + fp2_add(&R->x, &R->x, &u2); // x3 = A*(z1z2)^2 + u1 + u2 + fp2_mul(&R->x, &R->x, &t2); // x3 = dx^2 * (A*(z1z2)^2 + u1 + u2) + fp2_sub(&R->x, &t3, &R->x); // x3 = dy^2 - dx^2 * (A*(z1z2)^2 + u1 + u2) + + /* Compute y3 = dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3*/ + fp2_mul(&R->y, &u1, &t2); // y3 = u1 * dx^2 + fp2_sub(&R->y, &R->y, &R->x); // y3 = u1 * dx^2 - x3 + fp2_mul(&R->y, &R->y, &dy); // y3 = dy * (u1 * dx^2 - x3) + fp2_mul(&t3, &t2, &dx); // t3 = dx^3 + fp2_mul(&t3, &t3, &v1); // t3 = v1 * dx^3 + fp2_sub(&R->y, &R->y, &t3); // y3 = dy * (u1 * dx^2 - x3) - v1 * dx^3 + + /* Compute z3 = dx * z1 * z2 */ + fp2_mul(&R->z, &dx, &t0); + + /* Finally, we need to set R = P is Q.Z = 0 and R = Q if P.Z = 0 */ + select_jac_point(R, R, Q, ctl1); + select_jac_point(R, R, P, ctl2); +} + +void +jac_to_xz_add_components(add_components_t *add_comp, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Take P and Q in E distinct, two jac_point_t, return three components u,v and w in Fp2 such + // that the xz coordinates of P+Q are (u-v:w) and of P-Q are (u+v:w) + + fp2_t t0, t1, t2, t3, t4, t5, t6; + + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + fp2_mul(&t2, &P->x, &t1); // t2 = x1z2^2 + fp2_mul(&t3, &t0, &Q->x); // t3 = z1^2x2 + fp2_mul(&t4, &P->y, &Q->z); // t4 = y1z2 + fp2_mul(&t4, &t4, &t1); // t4 = y1z2^3 + fp2_mul(&t5, &P->z, &Q->y); // t5 = z1y2 + fp2_mul(&t5, &t5, &t0); // t5 = z1^3y2 + fp2_mul(&t0, &t0, &t1); // t0 = (z1z2)^2 + fp2_mul(&t6, &t4, &t5); // t6 = (z1z_2)^3y1y2 + fp2_add(&add_comp->v, &t6, &t6); // v = 2(z1z_2)^3y1y2 + fp2_sqr(&t4, &t4); // t4 = y1^2z2^6 + fp2_sqr(&t5, &t5); // t5 = z1^6y_2^2 + fp2_add(&t4, &t4, &t5); // t4 = z1^6y_2^2 + y1^2z2^6 + fp2_add(&t5, &t2, &t3); // t5 = x1z2^2 +z_1^2x2 + fp2_add(&t6, &t3, &t3); // t6 = 2z_1^2x2 + fp2_sub(&t6, &t5, &t6); // t6 = lambda = x1z2^2 - z_1^2x2 + fp2_sqr(&t6, &t6); // t6 = lambda^2 = (x1z2^2 - z_1^2x2)^2 + fp2_mul(&t1, &AC->A, &t0); // t1 = A*(z1z2)^2 + fp2_add(&t1, &t5, &t1); // t1 = gamma =A*(z1z2)^2 + x1z2^2 +z_1^2x2 + fp2_mul(&t1, &t1, &t6); // t1 = gamma*lambda^2 + fp2_sub(&add_comp->u, &t4, &t1); // u = z1^6y_2^2 + y1^2z2^6 - gamma*lambda^2 + fp2_mul(&add_comp->w, &t6, &t0); // w = (z1z2)^2(lambda)^2 +} diff --git a/src/pqm4/sqisign_lvl1/ref/ec_params.c b/src/pqm4/sqisign_lvl1/ref/ec_params.c new file mode 100644 index 0000000..5011f10 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/ec_params.c @@ -0,0 +1,4 @@ +#include +// p+1 divided by the power of 2 +const digit_t p_cofactor_for_2f[1] = {5}; + diff --git a/src/pqm4/sqisign_lvl1/ref/ec_params.h b/src/pqm4/sqisign_lvl1/ref/ec_params.h new file mode 100644 index 0000000..e02ac1d --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/ec_params.h @@ -0,0 +1,12 @@ +#ifndef EC_PARAMS_H +#define EC_PARAMS_H + +#include + +#define TORSION_EVEN_POWER 248 + +// p+1 divided by the power of 2 +extern const digit_t p_cofactor_for_2f[1]; +#define P_COFACTOR_FOR_2F_BITLENGTH 3 + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/encode_verification.c b/src/pqm4/sqisign_lvl1/ref/encode_verification.c new file mode 100644 index 0000000..fecdb9c --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/encode_verification.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +typedef unsigned char byte_t; + +// digits + +static void +encode_digits(byte_t *enc, const digit_t *x, size_t nbytes) +{ +#ifdef TARGET_BIG_ENDIAN + const size_t ndigits = nbytes / sizeof(digit_t); + const size_t rem = nbytes % sizeof(digit_t); + + for (size_t i = 0; i < ndigits; i++) + ((digit_t *)enc)[i] = BSWAP_DIGIT(x[i]); + if (rem) { + digit_t ld = BSWAP_DIGIT(x[ndigits]); + memcpy(enc + ndigits * sizeof(digit_t), (byte_t *)&ld, rem); + } +#else + memcpy(enc, (const byte_t *)x, nbytes); +#endif +} + +static void +decode_digits(digit_t *x, const byte_t *enc, size_t nbytes, size_t ndigits) +{ + assert(nbytes <= ndigits * sizeof(digit_t)); + memcpy((byte_t *)x, enc, nbytes); + memset((byte_t *)x + nbytes, 0, ndigits * sizeof(digit_t) - nbytes); + +#ifdef TARGET_BIG_ENDIAN + for (size_t i = 0; i < ndigits; i++) + x[i] = BSWAP_DIGIT(x[i]); +#endif +} + +// fp2_t + +static byte_t * +fp2_to_bytes(byte_t *enc, const fp2_t *x) +{ + fp2_encode(enc, x); + return enc + FP2_ENCODED_BYTES; +} + +static const byte_t * +fp2_from_bytes(fp2_t *x, const byte_t *enc) +{ + fp2_decode(x, enc); + return enc + FP2_ENCODED_BYTES; +} + +// curves and points + +static byte_t * +proj_to_bytes(byte_t *enc, const fp2_t *x, const fp2_t *z) +{ + assert(!fp2_is_zero(z)); + fp2_t tmp = *z; + fp2_inv(&tmp); +#ifndef NDEBUG + { + fp2_t chk; + fp2_mul(&chk, z, &tmp); + fp2_t one; + fp2_set_one(&one); + assert(fp2_is_equal(&chk, &one)); + } +#endif + fp2_mul(&tmp, x, &tmp); + enc = fp2_to_bytes(enc, &tmp); + return enc; +} + +static const byte_t * +proj_from_bytes(fp2_t *x, fp2_t *z, const byte_t *enc) +{ + enc = fp2_from_bytes(x, enc); + fp2_set_one(z); + return enc; +} + +static byte_t * +ec_curve_to_bytes(byte_t *enc, const ec_curve_t *curve) +{ + return proj_to_bytes(enc, &curve->A, &curve->C); +} + +static const byte_t * +ec_curve_from_bytes(ec_curve_t *curve, const byte_t *enc) +{ + memset(curve, 0, sizeof(*curve)); + return proj_from_bytes(&curve->A, &curve->C, enc); +} + +static byte_t * +ec_point_to_bytes(byte_t *enc, const ec_point_t *point) +{ + return proj_to_bytes(enc, &point->x, &point->z); +} + +static const byte_t * +ec_point_from_bytes(ec_point_t *point, const byte_t *enc) +{ + return proj_from_bytes(&point->x, &point->z, enc); +} + +static byte_t * +ec_basis_to_bytes(byte_t *enc, const ec_basis_t *basis) +{ + enc = ec_point_to_bytes(enc, &basis->P); + enc = ec_point_to_bytes(enc, &basis->Q); + enc = ec_point_to_bytes(enc, &basis->PmQ); + return enc; +} + +static const byte_t * +ec_basis_from_bytes(ec_basis_t *basis, const byte_t *enc) +{ + enc = ec_point_from_bytes(&basis->P, enc); + enc = ec_point_from_bytes(&basis->Q, enc); + enc = ec_point_from_bytes(&basis->PmQ, enc); + return enc; +} + +// public API + +byte_t * +public_key_to_bytes(byte_t *enc, const public_key_t *pk) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_to_bytes(enc, &pk->curve); + *enc++ = pk->hint_pk; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +const byte_t * +public_key_from_bytes(public_key_t *pk, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_from_bytes(&pk->curve, enc); + pk->hint_pk = *enc++; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +void +signature_to_bytes(byte_t *enc, const signature_t *sig) +{ +#ifndef NDEBUG + byte_t *const start = enc; +#endif + + enc = fp2_to_bytes(enc, &sig->E_aux_A); + + *enc++ = sig->backtracking; + *enc++ = sig->two_resp_length; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][1], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][1], nbytes); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + encode_digits(enc, sig->chall_coeff, nbytes); + enc += nbytes; + + *enc++ = sig->hint_aux; + *enc++ = sig->hint_chall; + + assert(enc - start == SIGNATURE_BYTES); +} + +void +signature_from_bytes(signature_t *sig, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + + enc = fp2_from_bytes(&sig->E_aux_A, enc); + + sig->backtracking = *enc++; + sig->two_resp_length = *enc++; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + decode_digits(sig->chall_coeff, enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + sig->hint_aux = *enc++; + sig->hint_chall = *enc++; + + assert(enc - start == SIGNATURE_BYTES); +} diff --git a/src/pqm4/sqisign_lvl1/ref/encoded_sizes.h b/src/pqm4/sqisign_lvl1/ref/encoded_sizes.h new file mode 100644 index 0000000..02f8642 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/encoded_sizes.h @@ -0,0 +1,11 @@ +#define SECURITY_BITS 128 +#define SQIsign_response_length 126 +#define HASH_ITERATIONS 64 +#define FP_ENCODED_BYTES 32 +#define FP2_ENCODED_BYTES 64 +#define EC_CURVE_ENCODED_BYTES 64 +#define EC_POINT_ENCODED_BYTES 64 +#define EC_BASIS_ENCODED_BYTES 192 +#define PUBLICKEY_BYTES 65 +#define SECRETKEY_BYTES 353 +#define SIGNATURE_BYTES 148 diff --git a/src/pqm4/sqisign_lvl1/ref/fp.c b/src/pqm4/sqisign_lvl1/ref/fp.c new file mode 100644 index 0000000..48e2937 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp.c @@ -0,0 +1,15 @@ +#include + +/* + * If ctl == 0x00000000, then *d is set to a0 + * If ctl == 0xFFFFFFFF, then *d is set to a1 + * ctl MUST be either 0x00000000 or 0xFFFFFFFF. + */ +void +fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl) +{ + digit_t cw = (int32_t)ctl; + for (unsigned int i = 0; i < NWORDS_FIELD; i++) { + (*d)[i] = (*a0)[i] ^ (cw & ((*a0)[i] ^ (*a1)[i])); + } +} diff --git a/src/pqm4/sqisign_lvl1/ref/fp.h b/src/pqm4/sqisign_lvl1/ref/fp.h new file mode 100644 index 0000000..1241d58 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp.h @@ -0,0 +1,48 @@ +#ifndef FP_H +#define FP_H + +//////////////////////////////////////////////// NOTE: this is placed here for now +#include +#include +#include +#include +#include +#include +#include +#include + +typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements + +extern const digit_t ONE[NWORDS_FIELD]; +extern const digit_t ZERO[NWORDS_FIELD]; +// extern const digit_t PM1O3[NWORDS_FIELD]; + +void fp_set_small(fp_t *x, const digit_t val); +void fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val); +void fp_set_zero(fp_t *x); +void fp_set_one(fp_t *x); +uint32_t fp_is_equal(const fp_t *a, const fp_t *b); +uint32_t fp_is_zero(const fp_t *a); +void fp_copy(fp_t *out, const fp_t *a); + +void fp_encode(void *dst, const fp_t *a); +void fp_decode_reduce(fp_t *d, const void *src, size_t len); +uint32_t fp_decode(fp_t *d, const void *src); + +void fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl); +void fp_cswap(fp_t *a, fp_t *b, uint32_t ctl); + +void fp_add(fp_t *out, const fp_t *a, const fp_t *b); +void fp_sub(fp_t *out, const fp_t *a, const fp_t *b); +void fp_neg(fp_t *out, const fp_t *a); +void fp_sqr(fp_t *out, const fp_t *a); +void fp_mul(fp_t *out, const fp_t *a, const fp_t *b); + +void fp_inv(fp_t *x); +uint32_t fp_is_square(const fp_t *a); +void fp_sqrt(fp_t *a); +void fp_half(fp_t *out, const fp_t *a); +void fp_exp3div4(fp_t *out, const fp_t *a); +void fp_div3(fp_t *out, const fp_t *a); + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/fp2.c b/src/pqm4/sqisign_lvl1/ref/fp2.c new file mode 100644 index 0000000..a258952 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp2.c @@ -0,0 +1,328 @@ +#include +#include +#include + +/* Arithmetic modulo X^2 + 1 */ + +void +fp2_set_small(fp2_t *x, const digit_t val) +{ + fp_set_small(&(x->re), val); + fp_set_zero(&(x->im)); +} + +void +fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n) +{ + fp_mul_small(&x->re, &y->re, n); + fp_mul_small(&x->im, &y->im, n); +} + +void +fp2_set_one(fp2_t *x) +{ + fp_set_one(&(x->re)); + fp_set_zero(&(x->im)); +} + +void +fp2_set_zero(fp2_t *x) +{ + fp_set_zero(&(x->re)); + fp_set_zero(&(x->im)); +} + +// Is a GF(p^2) element zero? +// Returns 0xFF...FF (true) if a=0, 0 (false) otherwise +uint32_t +fp2_is_zero(const fp2_t *a) +{ + return fp_is_zero(&(a->re)) & fp_is_zero(&(a->im)); +} + +// Compare two GF(p^2) elements in constant time +// Returns 0xFF...FF (true) if a=b, 0 (false) otherwise +uint32_t +fp2_is_equal(const fp2_t *a, const fp2_t *b) +{ + return fp_is_equal(&(a->re), &(b->re)) & fp_is_equal(&(a->im), &(b->im)); +} + +// Is a GF(p^2) element one? +// Returns 0xFF...FF (true) if a=1, 0 (false) otherwise +uint32_t +fp2_is_one(const fp2_t *a) +{ + return fp_is_equal(&(a->re), &ONE) & fp_is_zero(&(a->im)); +} + +void +fp2_copy(fp2_t *x, const fp2_t *y) +{ + fp_copy(&(x->re), &(y->re)); + fp_copy(&(x->im), &(y->im)); +} + +void +fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_add(&(x->re), &(y->re), &(z->re)); + fp_add(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_add_one(fp2_t *x, const fp2_t *y) +{ + fp_add(&x->re, &y->re, &ONE); + fp_copy(&x->im, &y->im); +} + +void +fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_sub(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_neg(fp2_t *x, const fp2_t *y) +{ + fp_neg(&(x->re), &(y->re)); + fp_neg(&(x->im), &(y->im)); +} + +void +fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_t t0, t1; + + fp_add(&t0, &(y->re), &(y->im)); + fp_add(&t1, &(z->re), &(z->im)); + fp_mul(&t0, &t0, &t1); + fp_mul(&t1, &(y->im), &(z->im)); + fp_mul(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &t0, &t1); + fp_sub(&(x->im), &(x->im), &(x->re)); + fp_sub(&(x->re), &(x->re), &t1); +} + +void +fp2_sqr(fp2_t *x, const fp2_t *y) +{ + fp_t sum, diff; + + fp_add(&sum, &(y->re), &(y->im)); + fp_sub(&diff, &(y->re), &(y->im)); + fp_mul(&(x->im), &(y->re), &(y->im)); + fp_add(&(x->im), &(x->im), &(x->im)); + fp_mul(&(x->re), &sum, &diff); +} + +void +fp2_inv(fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + fp_inv(&t0); + fp_mul(&(x->re), &(x->re), &t0); + fp_mul(&(x->im), &(x->im), &t0); + fp_neg(&(x->im), &(x->im)); +} + +uint32_t +fp2_is_square(const fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + + return fp_is_square(&t0); +} + +void +fp2_sqrt(fp2_t *a) +{ + fp_t x0, x1, t0, t1; + + /* From "Optimized One-Dimensional SQIsign Verification on Intel and + * Cortex-M4" by Aardal et al: https://eprint.iacr.org/2024/1563 */ + + // x0 = \delta = sqrt(a0^2 + a1^2). + fp_sqr(&x0, &(a->re)); + fp_sqr(&x1, &(a->im)); + fp_add(&x0, &x0, &x1); + fp_sqrt(&x0); + // If a1 = 0, there is a risk of \delta = -a0, which makes x0 = 0 below. + // In that case, we restore the value \delta = a0. + fp_select(&x0, &x0, &(a->re), fp_is_zero(&(a->im))); + // x0 = \delta + a0, t0 = 2 * x0. + fp_add(&x0, &x0, &(a->re)); + fp_add(&t0, &x0, &x0); + + // x1 = t0^(p-3)/4 + fp_exp3div4(&x1, &t0); + + // x0 = x0 * x1, x1 = x1 * a1, t1 = (2x0)^2. + fp_mul(&x0, &x0, &x1); + fp_mul(&x1, &x1, &(a->im)); + fp_add(&t1, &x0, &x0); + fp_sqr(&t1, &t1); + // If t1 = t0, return x0 + x1*i, otherwise x1 - x0*i. + fp_sub(&t0, &t0, &t1); + uint32_t f = fp_is_zero(&t0); + fp_neg(&t1, &x0); + fp_copy(&t0, &x1); + fp_select(&t0, &t0, &x0, f); + fp_select(&t1, &t1, &x1, f); + + // Check if t0 is zero + uint32_t t0_is_zero = fp_is_zero(&t0); + + // Check whether t0, t1 are odd + // Note: we encode to ensure canonical representation + uint8_t tmp_bytes[FP_ENCODED_BYTES]; + fp_encode(tmp_bytes, &t0); + uint32_t t0_is_odd = -((uint32_t)tmp_bytes[0] & 1); + fp_encode(tmp_bytes, &t1); + uint32_t t1_is_odd = -((uint32_t)tmp_bytes[0] & 1); + + // We negate the output if: + // t0 is odd, or + // t0 is zero and t1 is odd + uint32_t negate_output = t0_is_odd | (t0_is_zero & t1_is_odd); + fp_neg(&x0, &t0); + fp_select(&(a->re), &t0, &x0, negate_output); + fp_neg(&x0, &t1); + fp_select(&(a->im), &t1, &x0, negate_output); +} + +uint32_t +fp2_sqrt_verify(fp2_t *a) +{ + fp2_t t0, t1; + + fp2_copy(&t0, a); + fp2_sqrt(a); + fp2_sqr(&t1, a); + + return (fp2_is_equal(&t0, &t1)); +} + +void +fp2_half(fp2_t *x, const fp2_t *y) +{ + fp_half(&(x->re), &(y->re)); + fp_half(&(x->im), &(y->im)); +} + +void +fp2_batched_inv(fp2_t *x, int len) +{ + fp2_t t1[len], t2[len]; + fp2_t inverse; + + // x = x0,...,xn + // t1 = x0, x0*x1, ... ,x0 * x1 * ... * xn + fp2_copy(&t1[0], &x[0]); + for (int i = 1; i < len; i++) { + fp2_mul(&t1[i], &t1[i - 1], &x[i]); + } + + // inverse = 1/ (x0 * x1 * ... * xn) + fp2_copy(&inverse, &t1[len - 1]); + fp2_inv(&inverse); + + fp2_copy(&t2[0], &inverse); + // t2 = 1/ (x0 * x1 * ... * xn), 1/ (x0 * x1 * ... * x(n-1)) , ... , 1/xO + for (int i = 1; i < len; i++) { + fp2_mul(&t2[i], &t2[i - 1], &x[len - i]); + } + + fp2_copy(&x[0], &t2[len - 1]); + + for (int i = 1; i < len; i++) { + fp2_mul(&x[i], &t1[i - 1], &t2[len - i - 1]); + } +} + +// exponentiation using square and multiply +// Warning!! Not constant time! +void +fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size) +{ + fp2_t acc; + digit_t bit; + + fp2_copy(&acc, x); + fp2_set_one(out); + + // Iterate over each word of exp + for (int j = 0; j < size; j++) { + // Iterate over each bit of the word + for (int i = 0; i < RADIX; i++) { + bit = (exp[j] >> i) & 1; + if (bit == 1) { + fp2_mul(out, out, &acc); + } + fp2_sqr(&acc, &acc); + } + } +} + +void +fp2_print(const char *name, const fp2_t *a) +{ + printf("%s0x", name); + + uint8_t buf[FP_ENCODED_BYTES]; + fp_encode(&buf, &a->re); // Encoding ensures canonical rep + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + + printf(" + i*0x"); + + fp_encode(&buf, &a->im); + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + printf("\n"); +} + +void +fp2_encode(void *dst, const fp2_t *a) +{ + uint8_t *buf = dst; + fp_encode(buf, &(a->re)); + fp_encode(buf + FP_ENCODED_BYTES, &(a->im)); +} + +uint32_t +fp2_decode(fp2_t *d, const void *src) +{ + const uint8_t *buf = src; + uint32_t re, im; + + re = fp_decode(&(d->re), buf); + im = fp_decode(&(d->im), buf + FP_ENCODED_BYTES); + return re & im; +} + +void +fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl) +{ + fp_select(&(d->re), &(a0->re), &(a1->re), ctl); + fp_select(&(d->im), &(a0->im), &(a1->im), ctl); +} + +void +fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl) +{ + fp_cswap(&(a->re), &(b->re), ctl); + fp_cswap(&(a->im), &(b->im), ctl); +} diff --git a/src/pqm4/sqisign_lvl1/ref/fp2.h b/src/pqm4/sqisign_lvl1/ref/fp2.h new file mode 100644 index 0000000..00e673b --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp2.h @@ -0,0 +1,41 @@ +#ifndef FP2_H +#define FP2_H + +#include +#include "fp.h" +#include + +// Structure for representing elements in GF(p^2) +typedef struct fp2_t +{ + fp_t re, im; +} fp2_t; + +void fp2_set_small(fp2_t *x, const digit_t val); +void fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n); +void fp2_set_one(fp2_t *x); +void fp2_set_zero(fp2_t *x); +uint32_t fp2_is_zero(const fp2_t *a); +uint32_t fp2_is_equal(const fp2_t *a, const fp2_t *b); +uint32_t fp2_is_one(const fp2_t *a); +void fp2_copy(fp2_t *x, const fp2_t *y); +void fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_add_one(fp2_t *x, const fp2_t *y); +void fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_neg(fp2_t *x, const fp2_t *y); +void fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_sqr(fp2_t *x, const fp2_t *y); +void fp2_inv(fp2_t *x); +uint32_t fp2_is_square(const fp2_t *x); +void fp2_sqrt(fp2_t *x); +uint32_t fp2_sqrt_verify(fp2_t *a); +void fp2_half(fp2_t *x, const fp2_t *y); +void fp2_batched_inv(fp2_t *x, int len); +void fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size); +void fp2_print(const char *name, const fp2_t *a); +void fp2_encode(void *dst, const fp2_t *a); +uint32_t fp2_decode(fp2_t *d, const void *src); +void fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl); +void fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl); + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/fp_constants.h b/src/pqm4/sqisign_lvl1/ref/fp_constants.h new file mode 100644 index 0000000..c770b78 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp_constants.h @@ -0,0 +1,17 @@ +#if RADIX == 32 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 8 +#else +#define NWORDS_FIELD 9 +#endif +#define NWORDS_ORDER 8 +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 4 +#else +#define NWORDS_FIELD 5 +#endif +#define NWORDS_ORDER 4 +#endif +#define BITS 256 +#define LOG2P 8 diff --git a/src/pqm4/sqisign_lvl1/ref/fp_p5248_32.c b/src/pqm4/sqisign_lvl1/ref/fp_p5248_32.c new file mode 100644 index 0000000..a52add3 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/fp_p5248_32.c @@ -0,0 +1,942 @@ +// clang-format off +// Command line : python monty.py 32 +// 0x4ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + +#include +#include + +#define sspint int32_t +#define spint uint32_t +#define udpint uint64_t +#define dpint uint64_t + +#define Wordlength 32 +#define Nlimbs 9 +#define Radix 29 +#define Nbits 251 +#define Nbytes 32 + +#define MONTGOMERY +// propagate carries +inline static spint prop(spint *n) { + int i; + spint mask = ((spint)1 << 29u) - (spint)1; + sspint carry = (sspint)n[0]; + carry >>= 29u; + n[0] &= mask; + for (i = 1; i < 8; i++) { + carry += (sspint)n[i]; + n[i] = (spint)carry & mask; + carry >>= 29u; + } + n[8] += (spint)carry; + return -((n[8] >> 1) >> 30u); +} + +// propagate carries and add p if negative, propagate carries again +inline static int flatten(spint *n) { + spint carry = prop(n); + n[0] -= (spint)1u & carry; + n[8] += ((spint)0x50000u) & carry; + (void)prop(n); + return (int)(carry & 1); +} + +// Montgomery final subtract +static int modfsb(spint *n) { + n[0] += (spint)1u; + n[8] -= (spint)0x50000u; + return flatten(n); +} + +// Modular addition - reduce less than 2p +static void modadd(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] + b[0]; + n[1] = a[1] + b[1]; + n[2] = a[2] + b[2]; + n[3] = a[3] + b[3]; + n[4] = a[4] + b[4]; + n[5] = a[5] + b[5]; + n[6] = a[6] + b[6]; + n[7] = a[7] + b[7]; + n[8] = a[8] + b[8]; + n[0] += (spint)2u; + n[8] -= (spint)0xa0000u; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[8] += ((spint)0xa0000u) & carry; + (void)prop(n); +} + +// Modular subtraction - reduce less than 2p +static void modsub(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] - b[0]; + n[1] = a[1] - b[1]; + n[2] = a[2] - b[2]; + n[3] = a[3] - b[3]; + n[4] = a[4] - b[4]; + n[5] = a[5] - b[5]; + n[6] = a[6] - b[6]; + n[7] = a[7] - b[7]; + n[8] = a[8] - b[8]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[8] += ((spint)0xa0000u) & carry; + (void)prop(n); +} + +// Modular negation +static void modneg(const spint *b, spint *n) { + spint carry; + n[0] = (spint)0 - b[0]; + n[1] = (spint)0 - b[1]; + n[2] = (spint)0 - b[2]; + n[3] = (spint)0 - b[3]; + n[4] = (spint)0 - b[4]; + n[5] = (spint)0 - b[5]; + n[6] = (spint)0 - b[6]; + n[7] = (spint)0 - b[7]; + n[8] = (spint)0 - b[8]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[8] += ((spint)0xa0000u) & carry; + (void)prop(n); +} + +// Overflow limit = 18446744073709551616 +// maximum possible = 2594249331921584137 +// Modular multiplication, c=a*b mod 2p +static void modmul(const spint *a, const spint *b, spint *c) { + dpint t = 0; + spint p8 = 0x50000u; + spint q = ((spint)1 << 29u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + t += (dpint)a[0] * b[0]; + spint v0 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[1]; + t += (dpint)a[1] * b[0]; + spint v1 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[2]; + t += (dpint)a[1] * b[1]; + t += (dpint)a[2] * b[0]; + spint v2 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[3]; + t += (dpint)a[1] * b[2]; + t += (dpint)a[2] * b[1]; + t += (dpint)a[3] * b[0]; + spint v3 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[4]; + t += (dpint)a[1] * b[3]; + t += (dpint)a[2] * b[2]; + t += (dpint)a[3] * b[1]; + t += (dpint)a[4] * b[0]; + spint v4 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[5]; + t += (dpint)a[1] * b[4]; + t += (dpint)a[2] * b[3]; + t += (dpint)a[3] * b[2]; + t += (dpint)a[4] * b[1]; + t += (dpint)a[5] * b[0]; + spint v5 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[6]; + t += (dpint)a[1] * b[5]; + t += (dpint)a[2] * b[4]; + t += (dpint)a[3] * b[3]; + t += (dpint)a[4] * b[2]; + t += (dpint)a[5] * b[1]; + t += (dpint)a[6] * b[0]; + spint v6 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[7]; + t += (dpint)a[1] * b[6]; + t += (dpint)a[2] * b[5]; + t += (dpint)a[3] * b[4]; + t += (dpint)a[4] * b[3]; + t += (dpint)a[5] * b[2]; + t += (dpint)a[6] * b[1]; + t += (dpint)a[7] * b[0]; + spint v7 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[8]; + t += (dpint)a[1] * b[7]; + t += (dpint)a[2] * b[6]; + t += (dpint)a[3] * b[5]; + t += (dpint)a[4] * b[4]; + t += (dpint)a[5] * b[3]; + t += (dpint)a[6] * b[2]; + t += (dpint)a[7] * b[1]; + t += (dpint)a[8] * b[0]; + t += (dpint)v0 * (dpint)p8; + spint v8 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[1] * b[8]; + t += (dpint)a[2] * b[7]; + t += (dpint)a[3] * b[6]; + t += (dpint)a[4] * b[5]; + t += (dpint)a[5] * b[4]; + t += (dpint)a[6] * b[3]; + t += (dpint)a[7] * b[2]; + t += (dpint)a[8] * b[1]; + t += (dpint)v1 * (dpint)p8; + c[0] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[2] * b[8]; + t += (dpint)a[3] * b[7]; + t += (dpint)a[4] * b[6]; + t += (dpint)a[5] * b[5]; + t += (dpint)a[6] * b[4]; + t += (dpint)a[7] * b[3]; + t += (dpint)a[8] * b[2]; + t += (dpint)v2 * (dpint)p8; + c[1] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[3] * b[8]; + t += (dpint)a[4] * b[7]; + t += (dpint)a[5] * b[6]; + t += (dpint)a[6] * b[5]; + t += (dpint)a[7] * b[4]; + t += (dpint)a[8] * b[3]; + t += (dpint)v3 * (dpint)p8; + c[2] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[4] * b[8]; + t += (dpint)a[5] * b[7]; + t += (dpint)a[6] * b[6]; + t += (dpint)a[7] * b[5]; + t += (dpint)a[8] * b[4]; + t += (dpint)v4 * (dpint)p8; + c[3] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[5] * b[8]; + t += (dpint)a[6] * b[7]; + t += (dpint)a[7] * b[6]; + t += (dpint)a[8] * b[5]; + t += (dpint)v5 * (dpint)p8; + c[4] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[6] * b[8]; + t += (dpint)a[7] * b[7]; + t += (dpint)a[8] * b[6]; + t += (dpint)v6 * (dpint)p8; + c[5] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[7] * b[8]; + t += (dpint)a[8] * b[7]; + t += (dpint)v7 * (dpint)p8; + c[6] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[8] * b[8]; + t += (dpint)v8 * (dpint)p8; + c[7] = ((spint)t & mask); + t >>= 29; + c[8] = (spint)t; +} + +// Modular squaring, c=a*a mod 2p +static void modsqr(const spint *a, spint *c) { + udpint tot; + udpint t = 0; + spint p8 = 0x50000u; + spint q = ((spint)1 << 29u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + tot = (udpint)a[0] * a[0]; + t = tot; + spint v0 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[1]; + tot *= 2; + t += tot; + spint v1 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[2]; + tot *= 2; + tot += (udpint)a[1] * a[1]; + t += tot; + spint v2 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[3]; + tot += (udpint)a[1] * a[2]; + tot *= 2; + t += tot; + spint v3 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[4]; + tot += (udpint)a[1] * a[3]; + tot *= 2; + tot += (udpint)a[2] * a[2]; + t += tot; + spint v4 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[5]; + tot += (udpint)a[1] * a[4]; + tot += (udpint)a[2] * a[3]; + tot *= 2; + t += tot; + spint v5 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[6]; + tot += (udpint)a[1] * a[5]; + tot += (udpint)a[2] * a[4]; + tot *= 2; + tot += (udpint)a[3] * a[3]; + t += tot; + spint v6 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[7]; + tot += (udpint)a[1] * a[6]; + tot += (udpint)a[2] * a[5]; + tot += (udpint)a[3] * a[4]; + tot *= 2; + t += tot; + spint v7 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[8]; + tot += (udpint)a[1] * a[7]; + tot += (udpint)a[2] * a[6]; + tot += (udpint)a[3] * a[5]; + tot *= 2; + tot += (udpint)a[4] * a[4]; + t += tot; + t += (udpint)v0 * p8; + spint v8 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[1] * a[8]; + tot += (udpint)a[2] * a[7]; + tot += (udpint)a[3] * a[6]; + tot += (udpint)a[4] * a[5]; + tot *= 2; + t += tot; + t += (udpint)v1 * p8; + c[0] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[2] * a[8]; + tot += (udpint)a[3] * a[7]; + tot += (udpint)a[4] * a[6]; + tot *= 2; + tot += (udpint)a[5] * a[5]; + t += tot; + t += (udpint)v2 * p8; + c[1] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[3] * a[8]; + tot += (udpint)a[4] * a[7]; + tot += (udpint)a[5] * a[6]; + tot *= 2; + t += tot; + t += (udpint)v3 * p8; + c[2] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[4] * a[8]; + tot += (udpint)a[5] * a[7]; + tot *= 2; + tot += (udpint)a[6] * a[6]; + t += tot; + t += (udpint)v4 * p8; + c[3] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[5] * a[8]; + tot += (udpint)a[6] * a[7]; + tot *= 2; + t += tot; + t += (udpint)v5 * p8; + c[4] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[6] * a[8]; + tot *= 2; + tot += (udpint)a[7] * a[7]; + t += tot; + t += (udpint)v6 * p8; + c[5] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[7] * a[8]; + tot *= 2; + t += tot; + t += (udpint)v7 * p8; + c[6] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[8] * a[8]; + t += tot; + t += (udpint)v8 * p8; + c[7] = ((spint)t & mask); + t >>= 29; + c[8] = (spint)t; +} + +// copy +static void modcpy(const spint *a, spint *c) { + int i; + for (i = 0; i < 9; i++) { + c[i] = a[i]; + } +} + +// square n times +static void modnsqr(spint *a, int n) { + int i; + for (i = 0; i < n; i++) { + modsqr(a, a); + } +} + +// Calculate progenitor +static void modpro(const spint *w, spint *z) { + spint x[9]; + spint t0[9]; + spint t1[9]; + spint t2[9]; + spint t3[9]; + spint t4[9]; + modcpy(w, x); + modsqr(x, z); + modmul(x, z, t0); + modsqr(t0, z); + modmul(x, z, z); + modsqr(z, t1); + modsqr(t1, t3); + modsqr(t3, t2); + modcpy(t2, t4); + modnsqr(t4, 3); + modmul(t2, t4, t2); + modcpy(t2, t4); + modnsqr(t4, 6); + modmul(t2, t4, t2); + modcpy(t2, t4); + modnsqr(t4, 2); + modmul(t3, t4, t3); + modnsqr(t3, 13); + modmul(t2, t3, t2); + modcpy(t2, t3); + modnsqr(t3, 27); + modmul(t2, t3, t2); + modmul(z, t2, z); + modcpy(z, t2); + modnsqr(t2, 4); + modmul(t1, t2, t1); + modmul(t0, t1, t0); + modmul(t1, t0, t1); + modmul(t0, t1, t0); + modmul(t1, t0, t2); + modmul(t0, t2, t0); + modmul(t1, t0, t1); + modnsqr(t1, 63); + modmul(t0, t1, t1); + modnsqr(t1, 64); + modmul(t0, t1, t0); + modnsqr(t0, 57); + modmul(z, t0, z); +} + +// calculate inverse, provide progenitor h if available +static void modinv(const spint *x, const spint *h, spint *z) { + spint s[9]; + spint t[9]; + if (h == NULL) { + modpro(x, t); + } else { + modcpy(h, t); + } + modcpy(x, s); + modnsqr(t, 2); + modmul(s, t, z); +} + +// Convert m to n-residue form, n=nres(m) +static void nres(const spint *m, spint *n) { + const spint c[9] = {0xcf5c28fu, 0x6666666u, 0x13333333u, + 0x19999999u, 0xcccccccu, 0x6666666u, + 0x13333333u, 0x19999999u, 0x1ccccu}; + modmul(m, c, n); +} + +// Convert n back to normal form, m=redc(n) +static void redc(const spint *n, spint *m) { + int i; + spint c[9]; + c[0] = 1; + for (i = 1; i < 9; i++) { + c[i] = 0; + } + modmul(n, c, m); + (void)modfsb(m); +} + +// is unity? +static int modis1(const spint *a) { + int i; + spint c[9]; + spint c0; + spint d = 0; + redc(a, c); + for (i = 1; i < 9; i++) { + d |= c[i]; + } + c0 = (spint)c[0]; + return ((spint)1 & ((d - (spint)1) >> 29u) & + (((c0 ^ (spint)1) - (spint)1) >> 29u)); +} + +// is zero? +static int modis0(const spint *a) { + int i; + spint c[9]; + spint d = 0; + redc(a, c); + for (i = 0; i < 9; i++) { + d |= c[i]; + } + return ((spint)1 & ((d - (spint)1) >> 29u)); +} + +// set to zero +static void modzer(spint *a) { + int i; + for (i = 0; i < 9; i++) { + a[i] = 0; + } +} + +// set to one +static void modone(spint *a) { + int i; + a[0] = 1; + for (i = 1; i < 9; i++) { + a[i] = 0; + } + nres(a, a); +} + +// set to integer +static void modint(int x, spint *a) { + int i; + a[0] = (spint)x; + for (i = 1; i < 9; i++) { + a[i] = 0; + } + nres(a, a); +} + +// Modular multiplication by an integer, c=a*b mod 2p +static void modmli(const spint *a, int b, spint *c) { + spint t[9]; + modint(b, t); + modmul(a, t, c); +} + +// Test for quadratic residue +static int modqr(const spint *h, const spint *x) { + spint r[9]; + if (h == NULL) { + modpro(x, r); + modsqr(r, r); + } else { + modsqr(h, r); + } + modmul(r, x, r); + return modis1(r) | modis0(x); +} + +// conditional move g to f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcmv(int b, const spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 9; i++) { + s = g[i]; + t = f[i]; + f[i] = c0 * t + c1 * s; + f[i] -= r * (t + s); + } +} + +// conditional swap g and f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcsw(int b, volatile spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t, w; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 9; i++) { + s = g[i]; + t = f[i]; + w = r * (t + s); + f[i] = c0 * t + c1 * s; + f[i] -= w; + g[i] = c0 * s + c1 * t; + g[i] -= w; + } +} + +// Modular square root, provide progenitor h if available, NULL if not +static void modsqrt(const spint *x, const spint *h, spint *r) { + spint s[9]; + spint y[9]; + if (h == NULL) { + modpro(x, y); + } else { + modcpy(h, y); + } + modmul(y, x, s); + modcpy(s, r); +} + +// shift left by less than a word +static void modshl(unsigned int n, spint *a) { + int i; + a[8] = ((a[8] << n)) | (a[7] >> (29u - n)); + for (i = 7; i > 0; i--) { + a[i] = ((a[i] << n) & (spint)0x1fffffff) | (a[i - 1] >> (29u - n)); + } + a[0] = (a[0] << n) & (spint)0x1fffffff; +} + +// shift right by less than a word. Return shifted out part +static int modshr(unsigned int n, spint *a) { + int i; + spint r = a[0] & (((spint)1 << n) - (spint)1); + for (i = 0; i < 8; i++) { + a[i] = (a[i] >> n) | ((a[i + 1] << (29u - n)) & (spint)0x1fffffff); + } + a[8] = a[8] >> n; + return r; +} + +// set a= 2^r +static void mod2r(unsigned int r, spint *a) { + unsigned int n = r / 29u; + unsigned int m = r % 29u; + modzer(a); + if (r >= 32 * 8) + return; + a[n] = 1; + a[n] <<= m; + nres(a, a); +} + +// export to byte array +static void modexp(const spint *a, char *b) { + int i; + spint c[9]; + redc(a, c); + for (i = 31; i >= 0; i--) { + b[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +// import from byte array +// returns 1 if in range, else 0 +static int modimp(const char *b, spint *a) { + int i, res; + for (i = 0; i < 9; i++) { + a[i] = 0; + } + for (i = 0; i < 32; i++) { + modshl(8, a); + a[0] += (spint)(unsigned char)b[i]; + } + res = modfsb(a); + nres(a, a); + return res; +} + +// determine sign +static int modsign(const spint *a) { + spint c[9]; + redc(a, c); + return c[0] % 2; +} + +// return true if equal +static int modcmp(const spint *a, const spint *b) { + spint c[9], d[9]; + int i, eq = 1; + redc(a, c); + redc(b, d); + for (i = 0; i < 9; i++) { + eq &= (((c[i] ^ d[i]) - 1) >> 29) & 1; + } + return eq; +} + +// clang-format on +/****************************************************************************** + API functions calling generated code above + ******************************************************************************/ + +#include + +const digit_t ZERO[NWORDS_FIELD] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +const digit_t ONE[NWORDS_FIELD] = { 0x00000666, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00020000 }; +// Montgomery representation of 2^-1 +static const digit_t TWO_INV[NWORDS_FIELD] = { 0x00000333, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00010000 }; +// Montgomery representation of 3^-1 +static const digit_t THREE_INV[NWORDS_FIELD] = { + 0x15555777, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x00025555, +}; +// Montgomery representation of 2^256 +static const digit_t R2[NWORDS_FIELD] = { 0x0667ae14, 0x13333333, 0x19999999, 0x0ccccccc, 0x06666666, + 0x13333333, 0x19999999, 0x0ccccccc, 0x00026666 }; + +void +fp_set_small(fp_t *x, const digit_t val) +{ + modint((int)val, *x); +} + +void +fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val) +{ + modmli(*a, (int)val, *x); +} + +void +fp_set_zero(fp_t *x) +{ + modzer(*x); +} + +void +fp_set_one(fp_t *x) +{ + modone(*x); +} + +uint32_t +fp_is_equal(const fp_t *a, const fp_t *b) +{ + return -(uint32_t)modcmp(*a, *b); +} + +uint32_t +fp_is_zero(const fp_t *a) +{ + return -(uint32_t)modis0(*a); +} + +void +fp_copy(fp_t *out, const fp_t *a) +{ + modcpy(*a, *out); +} + +void +fp_cswap(fp_t *a, fp_t *b, uint32_t ctl) +{ + modcsw((int)(ctl & 0x1), *a, *b); +} + +void +fp_add(fp_t *out, const fp_t *a, const fp_t *b) +{ + modadd(*a, *b, *out); +} + +void +fp_sub(fp_t *out, const fp_t *a, const fp_t *b) +{ + modsub(*a, *b, *out); +} + +void +fp_neg(fp_t *out, const fp_t *a) +{ + modneg(*a, *out); +} + +void +fp_sqr(fp_t *out, const fp_t *a) +{ + modsqr(*a, *out); +} + +void +fp_mul(fp_t *out, const fp_t *a, const fp_t *b) +{ + modmul(*a, *b, *out); +} + +void +fp_inv(fp_t *x) +{ + modinv(*x, NULL, *x); +} + +uint32_t +fp_is_square(const fp_t *a) +{ + return -(uint32_t)modqr(NULL, *a); +} + +void +fp_sqrt(fp_t *a) +{ + modsqrt(*a, NULL, *a); +} + +void +fp_half(fp_t *out, const fp_t *a) +{ + modmul(TWO_INV, *a, *out); +} + +void +fp_exp3div4(fp_t *out, const fp_t *a) +{ + modpro(*a, *out); +} + +void +fp_div3(fp_t *out, const fp_t *a) +{ + modmul(THREE_INV, *a, *out); +} + +void +fp_encode(void *dst, const fp_t *a) +{ + // Modified version of modexp() + int i; + spint c[9]; + redc(*a, c); + for (i = 0; i < 32; i++) { + ((char *)dst)[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +uint32_t +fp_decode(fp_t *d, const void *src) +{ + // Modified version of modimp() + int i; + spint res; + const unsigned char *b = src; + for (i = 0; i < 9; i++) { + (*d)[i] = 0; + } + for (i = 31; i >= 0; i--) { + modshl(8, *d); + (*d)[0] += (spint)b[i]; + } + res = (spint)-modfsb(*d); + nres(*d, *d); + // If the value was canonical then res = -1; otherwise, res = 0 + for (i = 0; i < 9; i++) { + (*d)[i] &= res; + } + return (uint32_t)res; +} + +static inline unsigned char +add_carry(unsigned char cc, spint a, spint b, spint *d) +{ + udpint t = (udpint)a + (udpint)b + cc; + *d = (spint)t; + return (unsigned char)(t >> Wordlength); +} + +static void +partial_reduce(spint *out, const spint *src) +{ + spint h, l, quo, rem; + unsigned char cc; + + // Split value in high (8 bits) and low (248 bits) parts. + h = src[7] >> 24; + l = src[7] & 0x00FFFFFF; + + // 5*2^248 = 1 mod q; hence, we add floor(h/5) + (h mod 5)*2^248 + // to the low part. + quo = (h * 0xCD) >> 10; + rem = h - (5 * quo); + cc = add_carry(0, src[0], quo, &out[0]); + cc = add_carry(cc, src[1], 0, &out[1]); + cc = add_carry(cc, src[2], 0, &out[2]); + cc = add_carry(cc, src[3], 0, &out[3]); + cc = add_carry(cc, src[4], 0, &out[4]); + cc = add_carry(cc, src[5], 0, &out[5]); + cc = add_carry(cc, src[6], 0, &out[6]); + (void)add_carry(cc, l, rem << 24, &out[7]); +} + +// Little-endian encoding of a 32-bit integer. +static inline void +enc32le(void *dst, uint32_t x) +{ + uint8_t *buf = dst; + buf[0] = (uint8_t)x; + buf[1] = (uint8_t)(x >> 8); + buf[2] = (uint8_t)(x >> 16); + buf[3] = (uint8_t)(x >> 24); +} + +// Little-endian decoding of a 32-bit integer. +static inline uint32_t +dec32le(const void *src) +{ + const uint8_t *buf = src; + return (spint)buf[0] | ((spint)buf[1] << 8) | ((spint)buf[2] << 16) | ((spint)buf[3] << 24); +} + +void +fp_decode_reduce(fp_t *d, const void *src, size_t len) +{ + uint32_t t[8]; // Stores Nbytes * 8 bits + uint8_t tmp[32]; // Nbytes + const uint8_t *b = src; + + fp_set_zero(d); + if (len == 0) { + return; + } + + size_t rem = len % 32; + if (rem != 0) { + // Input size is not a multiple of 32, we decode a partial + // block, which is already less than 2^248. + size_t k = len - rem; + memcpy(tmp, b + k, len - k); + memset(tmp + len - k, 0, (sizeof tmp) - (len - k)); + fp_decode(d, tmp); + len = k; + } + // Process all remaining blocks, in descending address order. + while (len > 0) { + fp_mul(d, d, &R2); + len -= 32; + t[0] = dec32le(b + len); + t[1] = dec32le(b + len + 4); + t[2] = dec32le(b + len + 8); + t[3] = dec32le(b + len + 12); + t[4] = dec32le(b + len + 16); + t[5] = dec32le(b + len + 20); + t[6] = dec32le(b + len + 24); + t[7] = dec32le(b + len + 28); + partial_reduce(t, t); + enc32le(tmp, t[0]); + enc32le(tmp + 4, t[1]); + enc32le(tmp + 8, t[2]); + enc32le(tmp + 12, t[3]); + enc32le(tmp + 16, t[4]); + enc32le(tmp + 20, t[5]); + enc32le(tmp + 24, t[6]); + enc32le(tmp + 28, t[7]); + fp_t a; + fp_decode(&a, tmp); + fp_add(d, d, &a); + } +} diff --git a/src/pqm4/sqisign_lvl1/ref/hd.c b/src/pqm4/sqisign_lvl1/ref/hd.c new file mode 100644 index 0000000..0424108 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/hd.c @@ -0,0 +1,93 @@ +#include +#include + +void +double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2) +{ + ec_dbl(&out->P1, &in->P1, &E1E2->E1); + ec_dbl(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + memmove(out, in, sizeof(theta_couple_point_t)); + } else { + double_couple_point(out, in, E1E2); + for (unsigned i = 0; i < n - 1; i++) { + double_couple_point(out, out, E1E2); + } + } +} + +void +add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2) +{ + ADD(&out->P1, &T1->P1, &T2->P1, &E1E2->E1); + ADD(&out->P2, &T1->P2, &T2->P2, &E1E2->E2); +} + +void +double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + DBL(&out->P1, &in->P1, &E1E2->E1); + DBL(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + *out = *in; + } else if (n == 1) { + double_couple_jac_point(out, in, E1E2); + } else { + fp2_t a1, a2, t1, t2; + + jac_to_ws(&out->P1, &t1, &a1, &in->P1, &E1E2->E1); + jac_to_ws(&out->P2, &t2, &a2, &in->P2, &E1E2->E2); + + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + for (unsigned i = 0; i < n - 1; i++) { + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + } + + jac_from_ws(&out->P1, &out->P1, &a1, &E1E2->E1); + jac_from_ws(&out->P2, &out->P2, &a2, &E1E2->E2); + } +} + +void +couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP) +{ + jac_to_xz(&P->P1, &xyP->P1); + jac_to_xz(&P->P2, &xyP->P2); +} + +void +copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2) +{ + // Copy the basis on E1 to (P, _) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P1, &B1->P); + copy_point(&ker->T2.P1, &B1->Q); + copy_point(&ker->T1m2.P1, &B1->PmQ); + + // Copy the basis on E2 to (_, P) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P2, &B2->P); + copy_point(&ker->T2.P2, &B2->Q); + copy_point(&ker->T1m2.P2, &B2->PmQ); +} diff --git a/src/pqm4/sqisign_lvl1/ref/hd.h b/src/pqm4/sqisign_lvl1/ref/hd.h new file mode 100644 index 0000000..2b16e23 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/hd.h @@ -0,0 +1,435 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief The HD-isogenies algorithm required by the signature + * + */ + +#ifndef HD_H +#define HD_H + +#include +#include +#include + +/** @defgroup hd_module Abelian surfaces and their isogenies + * @{ + */ + +#define HD_extra_torsion 2 + +/** @defgroup hd_struct Data structures for dimension 2 + * @{ + */ + +/** @brief Type for couple point with XZ coordinates + * @typedef theta_couple_point_t + * + * @struct theta_couple_point + * + * Structure for the couple point on an elliptic product + * using XZ coordinates + */ +typedef struct theta_couple_point +{ + ec_point_t P1; + ec_point_t P2; +} theta_couple_point_t; + +/** @brief Type for three couple points T1, T2, T1-T2 with XZ coordinates + * @typedef theta_kernel_couple_points_t + * + * @struct theta_kernel_couple_points + * + * Structure for a triple of theta couple points T1, T2 and T1 - T2 + */ +typedef struct theta_kernel_couple_points +{ + theta_couple_point_t T1; + theta_couple_point_t T2; + theta_couple_point_t T1m2; +} theta_kernel_couple_points_t; + +/** @brief Type for couple point with XYZ coordinates + * @typedef theta_couple_jac_point_t + * + * @struct theta_couple_jac_point + * + * Structure for the couple point on an elliptic product + * using XYZ coordinates + */ +typedef struct theta_couple_jac_point +{ + jac_point_t P1; + jac_point_t P2; +} theta_couple_jac_point_t; + +/** @brief Type for couple curve * + * @typedef theta_couple_curve_t + * + * @struct theta_couple_curve + * + * the theta_couple_curve structure + */ +typedef struct theta_couple_curve +{ + ec_curve_t E1; + ec_curve_t E2; +} theta_couple_curve_t; + +/** @brief Type for a product E1 x E2 with corresponding bases + * @typedef theta_couple_curve_with_basis_t + * + * @struct theta_couple_curve_with_basis + * + * tType for a product E1 x E2 with corresponding bases Ei[2^n] + */ +typedef struct theta_couple_curve_with_basis +{ + ec_curve_t E1; + ec_curve_t E2; + ec_basis_t B1; + ec_basis_t B2; +} theta_couple_curve_with_basis_t; + +/** @brief Type for theta point * + * @typedef theta_point_t + * + * @struct theta_point + * + * the theta_point structure used + */ +typedef struct theta_point +{ + fp2_t x; + fp2_t y; + fp2_t z; + fp2_t t; +} theta_point_t; + +/** @brief Type for theta point with repeating components + * @typedef theta_point_compact_t + * + * @struct theta_point_compact + * + * the theta_point structure used for points with repeated components + */ +typedef struct theta_point_compact +{ + fp2_t x; + fp2_t y; +} theta_point_compact_t; + +/** @brief Type for theta structure * + * @typedef theta_structure_t + * + * @struct theta_structure + * + * the theta_structure structure used + */ +typedef struct theta_structure +{ + theta_point_t null_point; + bool precomputation; + + // Eight precomputed values used for doubling and + // (2,2)-isogenies. + fp2_t XYZ0; + fp2_t YZT0; + fp2_t XZT0; + fp2_t XYT0; + + fp2_t xyz0; + fp2_t yzt0; + fp2_t xzt0; + fp2_t xyt0; +} theta_structure_t; + +/** @brief A 2x2 matrix used for action by translation + * @typedef translation_matrix_t + * + * @struct translation_matrix + * + * Structure to hold 4 fp2_t elements representing a 2x2 matrix used when computing + * a compatible theta structure during gluing. + */ +typedef struct translation_matrix +{ + fp2_t g00; + fp2_t g01; + fp2_t g10; + fp2_t g11; +} translation_matrix_t; + +/** @brief A 4x4 matrix used for basis changes + * @typedef basis_change_matrix_t + * + * @struct basis_change_matrix + * + * Structure to hold 16 elements representing a 4x4 matrix used for changing + * the basis of a theta point. + */ +typedef struct basis_change_matrix +{ + fp2_t m[4][4]; +} basis_change_matrix_t; + +/** @brief Type for gluing (2,2) theta isogeny * + * @typedef theta_gluing_t + * + * @struct theta_gluing + * + * the theta_gluing structure + */ +typedef struct theta_gluing +{ + + theta_couple_curve_t domain; + theta_couple_jac_point_t xyK1_8; + theta_point_compact_t imageK1_8; + basis_change_matrix_t M; + theta_point_t precomputation; + theta_point_t codomain; + +} theta_gluing_t; + +/** @brief Type for standard (2,2) theta isogeny * + * @typedef theta_isogeny_t + * + * @struct theta_isogeny + * + * the theta_isogeny structure + */ +typedef struct theta_isogeny +{ + theta_point_t T1_8; + theta_point_t T2_8; + bool hadamard_bool_1; + bool hadamard_bool_2; + theta_structure_t domain; + theta_point_t precomputation; + theta_structure_t codomain; +} theta_isogeny_t; + +/** @brief Type for splitting isomorphism * + * @typedef theta_splitting_t + * + * @struct theta_splitting + * + * the theta_splitting structure + */ +typedef struct theta_splitting +{ + basis_change_matrix_t M; + theta_structure_t B; + +} theta_splitting_t; + +// end of hd_struct +/** + * @} + */ + +/** @defgroup hd_functions Functions for dimension 2 + * @{ + */ + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param n : the number of iteration + * @param E1E2 an elliptic product + * @param in the theta couple point in the elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the addition of two points in (X : Y : Z) coordinates on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param T1 the theta couple jac point in the elliptic product + * @param T2 the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1, P2), (Q1, Q2) + * out = (P1 + Q1, P2 + Q2) + * + **/ +void add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple jac point in on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param n : the number of iteration + * @param in the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief A forgetful function which returns (X : Z) points given a pair of (X : Y : Z) points + * + * @param P Output: the theta_couple_point + * @param xyP : the theta_couple_jac_point + **/ +void couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it does extra isotropy + * checks on the kernel. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it selects a random Montgomery + * model of the codomain. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success, 0 on failure + * + */ +int theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Given a bases B1 on E1 and B2 on E2 copies this to create a kernel + * on E1 x E2 as couple points T1, T2 and T1 - T2 + * + * @param ker Output: a kernel for dim_two_isogenies (T1, T2, T1-T2) + * @param B1 Input basis on E1 + * @param B2 Input basis on E2 + **/ +void copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2); + +/** + * @brief Given a couple of points (P1, P2) on a couple of curves (E1, E2) + * this function tests if both points are of order exactly 2^t + * + * @param T: couple point (P1, P2) + * @param E: a couple of curves (E1, E2) + * @param t: an integer + * @returns 0xFFFFFFFF on success, 0 on failure + */ +static int +test_couple_point_order_twof(const theta_couple_point_t *T, const theta_couple_curve_t *E, int t) +{ + int check_P1 = test_point_order_twof(&T->P1, &E->E1, t); + int check_P2 = test_point_order_twof(&T->P2, &E->E2, t); + + return check_P1 & check_P2; +} + +// end of hd_functions +/** + * @} + */ +// end of hd_module +/** + * @} + */ +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.c b/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.c new file mode 100644 index 0000000..6332d21 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.c @@ -0,0 +1,143 @@ +#include + +#define FP2_ZERO 0 +#define FP2_ONE 1 +#define FP2_I 2 +#define FP2_MINUS_ONE 3 +#define FP2_MINUS_I 4 + +const int EVEN_INDEX[10][2] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 2}, {2, 0}, {2, 1}, {3, 0}, {3, 3}}; +const int CHI_EVAL[4][4] = {{1, 1, 1, 1}, {1, -1, 1, -1}, {1, 1, -1, -1}, {1, -1, -1, 1}}; +const fp2_t FP2_CONSTANTS[5] = {{ +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x333, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2} +#elif RADIX == 32 +{0x666, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x20000} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x33, 0x0, 0x0, 0x100000000000000} +#else +{0x19, 0x0, 0x0, 0x0, 0x300000000000} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x333, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2} +#elif RADIX == 32 +{0x666, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x20000} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x33, 0x0, 0x0, 0x100000000000000} +#else +{0x19, 0x0, 0x0, 0x0, 0x300000000000} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x1ccc, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x7} +#elif RADIX == 32 +{0x1ffff999, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x2ffff} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xffffffffffffffcc, 0xffffffffffffffff, 0xffffffffffffffff, 0x3ffffffffffffff} +#else +{0x7ffffffffffe6, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x1fffffffffff} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x1ccc, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x7} +#elif RADIX == 32 +{0x1ffff999, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x2ffff} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xffffffffffffffcc, 0xffffffffffffffff, 0xffffffffffffffff, 0x3ffffffffffffff} +#else +{0x7ffffffffffe6, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x1fffffffffff} +#endif +#endif +}}; +const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10] = {{{{FP2_ONE, FP2_I, FP2_ONE, FP2_I}, {FP2_ONE, FP2_MINUS_I, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_MINUS_ONE, FP2_MINUS_I}, {FP2_MINUS_ONE, FP2_I, FP2_MINUS_ONE, FP2_I}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}}; +const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6] = {{{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}, {{{FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}}}}; diff --git a/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.h b/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.h new file mode 100644 index 0000000..b3147a4 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/hd_splitting_transforms.h @@ -0,0 +1,18 @@ +#ifndef HD_SPLITTING_H +#define HD_SPLITTING_H + +#include +#include + +typedef struct precomp_basis_change_matrix { + uint8_t m[4][4]; +} precomp_basis_change_matrix_t; + +extern const int EVEN_INDEX[10][2]; +extern const int CHI_EVAL[4][4]; +extern const fp2_t FP2_CONSTANTS[5]; +extern const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10]; +extern const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6]; + +#endif + diff --git a/src/pqm4/sqisign_lvl1/ref/isog.h b/src/pqm4/sqisign_lvl1/ref/isog.h new file mode 100644 index 0000000..b251ca3 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/isog.h @@ -0,0 +1,28 @@ +#ifndef _ISOG_H_ +#define _ISOG_H_ +#include +#include + +/* KPS structure for isogenies of degree 2 or 4 */ +typedef struct +{ + ec_point_t K; +} ec_kps2_t; +typedef struct +{ + ec_point_t K[3]; +} ec_kps4_t; + +void xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P); // degree-2 isogeny construction +void xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24); + +void xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P); // degree-4 isogeny construction +void xisog_4_singular(ec_kps4_t *kps, ec_point_t *B24, const ec_point_t P, ec_point_t A24); + +void xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps); +void xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps); + +void xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps); +void xeval_4_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_point_t P, const ec_kps4_t *kps); + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/isog_chains.c b/src/pqm4/sqisign_lvl1/ref/isog_chains.c new file mode 100644 index 0000000..abc9808 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/isog_chains.c @@ -0,0 +1,241 @@ +#include "isog.h" +#include + +// since we use degree 4 isogeny steps, we need to handle the odd case with care +static uint32_t +ec_eval_even_strategy(ec_curve_t *curve, + ec_point_t *points, + unsigned len_points, + const ec_point_t *kernel, + const int isog_len) +{ + ec_curve_normalize_A24(curve); + ec_point_t A24; + copy_point(&A24, &curve->A24); + + int space = 1; + for (int i = 1; i < isog_len; i *= 2) + ++space; + + // Stack of remaining kernel points and their associated orders + ec_point_t splits[space]; + uint16_t todo[space]; + splits[0] = *kernel; + todo[0] = isog_len; + + int current = 0; // Pointer to current top of stack + + // Chain of 4-isogenies + for (int j = 0; j < isog_len / 2; ++j) { + assert(current >= 0); + assert(todo[current] >= 1); + // Get the next point of order 4 + while (todo[current] != 2) { + assert(todo[current] >= 3); + // A new split will be added + ++current; + assert(current < space); + // We set the seed of the new split to be computed and saved + copy_point(&splits[current], &splits[current - 1]); + // if we copied from the very first element, then we perform one additional doubling + unsigned num_dbls = todo[current - 1] / 4 * 2 + todo[current - 1] % 2; + todo[current] = todo[current - 1] - num_dbls; + while (num_dbls--) + xDBL_A24(&splits[current], &splits[current], &A24, false); + } + + if (j == 0) { + assert(fp2_is_one(&A24.z)); + if (!ec_is_four_torsion(&splits[current], curve)) + return -1; + + ec_point_t T; + xDBL_A24(&T, &splits[current], &A24, false); + if (fp2_is_zero(&T.x)) + return -1; // special isogenies not allowed + } else { + assert(todo[current] == 2); +#ifndef NDEBUG + if (fp2_is_zero(&splits[current].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + + ec_point_t test; + xDBL_A24(&test, &splits[current], &A24, false); + if (fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly zero before doubling"); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + } + + // Evaluate 4-isogeny + ec_kps4_t kps4; + xisog_4(&kps4, &A24, splits[current]); + xeval_4(splits, splits, current, &kps4); + for (int i = 0; i < current; ++i) + todo[i] -= 2; + xeval_4(points, points, len_points, &kps4); + + --current; + } + assert(isog_len % 2 ? !current : current == -1); + + // Final 2-isogeny + if (isog_len % 2) { +#ifndef NDEBUG + if (fp2_is_zero(&splits[0].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + ec_point_t test; + copy_point(&test, &splits[0]); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + + // We need to check the order of this point in case there were no 4-isogenies + if (isog_len == 1 && !ec_is_two_torsion(&splits[0], curve)) + return -1; + if (fp2_is_zero(&splits[0].x)) { + // special isogenies not allowed + // this case can only happen if isog_len == 1; otherwise the + // previous 4-isogenies we computed ensure that $T=(0:1)$ is put + // as the kernel of the dual isogeny + return -1; + } + + ec_kps2_t kps2; + xisog_2(&kps2, &A24, splits[0]); + xeval_2(points, points, len_points, &kps2); + } + + // Output curve in the form (A:C) + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + + return 0; +} + +uint32_t +ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points) +{ + copy_curve(image, &phi->curve); + return ec_eval_even_strategy(image, points, len_points, &phi->kernel, phi->length); +} + +// naive implementation +uint32_t +ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special) // do we allow special isogenies? +{ + + ec_point_t A24; + AC_to_A24(&A24, curve); + + ec_kps2_t kps; + ec_point_t small_K, big_K; + copy_point(&big_K, kernel); + + for (int i = 0; i < len; i++) { + copy_point(&small_K, &big_K); + // small_K = big_K; + for (int j = 0; j < len - i - 1; j++) { + xDBL_A24(&small_K, &small_K, &A24, false); + } + // Check the order of the point before the first isogeny step + if (i == 0 && !ec_is_two_torsion(&small_K, curve)) + return (uint32_t)-1; + // Perform isogeny step + if (fp2_is_zero(&small_K.x)) { + if (special) { + ec_point_t B24; + xisog_2_singular(&kps, &B24, A24); + xeval_2_singular(&big_K, &big_K, 1, &kps); + xeval_2_singular(points, points, len_points, &kps); + copy_point(&A24, &B24); + } else { + return (uint32_t)-1; + } + } else { + xisog_2(&kps, &A24, small_K); + xeval_2(&big_K, &big_K, 1, &kps); + xeval_2(points, points, len_points, &kps); + } + } + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + return 0; +} + +uint32_t +ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to) +{ + fp2_t t0, t1, t2, t3, t4; + + fp2_mul(&t0, &from->A, &from->C); + fp2_mul(&t1, &to->A, &to->C); + + fp2_mul(&t2, &t1, &to->C); // toA*toC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*toA*toC^2 + fp2_sqr(&t3, &to->A); + fp2_mul(&t3, &t3, &to->A); // toA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->Nx, &t3, &t2); // 2*toA^3-9*toA*toC^2 + fp2_mul(&t2, &t0, &from->A); // fromA^2*fromC + fp2_sqr(&t3, &from->C); + fp2_mul(&t3, &t3, &from->C); // fromC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*fromC^3 + fp2_sub(&t3, &t3, &t2); // 3*fromC^3-fromA^2*fromC + fp2_mul(&isom->Nx, &isom->Nx, &t3); // lambda_x = (2*toA^3-9*toA*toC^2)*(3*fromC^3-fromA^2*fromC) + + fp2_mul(&t2, &t0, &from->C); // fromA*fromC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*fromA*fromC^2 + fp2_sqr(&t3, &from->A); + fp2_mul(&t3, &t3, &from->A); // fromA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->D, &t3, &t2); // 2*fromA^3-9*fromA*fromC^2 + fp2_mul(&t2, &t1, &to->A); // toA^2*toC + fp2_sqr(&t3, &to->C); + fp2_mul(&t3, &t3, &to->C); // toC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*toC^3 + fp2_sub(&t3, &t3, &t2); // 3*toC^3-toA^2*toC + fp2_mul(&isom->D, &isom->D, &t3); // lambda_z = (2*fromA^3-9*fromA*fromC^2)*(3*toC^3-toA^2*toC) + + // Mont -> SW -> SW -> Mont + fp2_mul(&t0, &to->C, &from->A); + fp2_mul(&t0, &t0, &isom->Nx); // lambda_x*toC*fromA + fp2_mul(&t1, &from->C, &to->A); + fp2_mul(&t1, &t1, &isom->D); // lambda_z*fromC*toA + fp2_sub(&isom->Nz, &t0, &t1); // lambda_x*toC*fromA - lambda_z*fromC*toA + fp2_mul(&t0, &from->C, &to->C); + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // 3*fromC*toC + fp2_mul(&isom->D, &isom->D, &t0); // 3*lambda_z*fromC*toC + fp2_mul(&isom->Nx, &isom->Nx, &t0); // 3*lambda_x*fromC*toC + + return (fp2_is_zero(&isom->Nx) | fp2_is_zero(&isom->D)); +} + +void +ec_iso_eval(ec_point_t *P, ec_isom_t *isom) +{ + fp2_t tmp; + fp2_mul(&P->x, &P->x, &isom->Nx); + fp2_mul(&tmp, &P->z, &isom->Nz); + fp2_add(&P->x, &P->x, &tmp); + fp2_mul(&P->z, &P->z, &isom->D); +} diff --git a/src/pqm4/sqisign_lvl1/ref/mp.c b/src/pqm4/sqisign_lvl1/ref/mp.c new file mode 100644 index 0000000..27f4a96 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/mp.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include + +// double-wide multiplication +void +MUL(digit_t *out, const digit_t a, const digit_t b) +{ +#ifdef RADIX_32 + uint64_t r = (uint64_t)a * b; + out[0] = r & 0xFFFFFFFFUL; + out[1] = r >> 32; + +#elif defined(RADIX_64) && defined(_MSC_VER) + uint64_t umul_hi; + out[0] = _umul128(a, b, &umul_hi); + out[1] = umul_hi; + +#elif defined(RADIX_64) && defined(HAVE_UINT128) + unsigned __int128 umul_tmp; + umul_tmp = (unsigned __int128)(a) * (unsigned __int128)(b); + out[0] = (uint64_t)umul_tmp; + out[1] = (uint64_t)(umul_tmp >> 64); + +#else + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al * bl; + albh = al * bh; + ahbl = ah * bl; + ahbh = ah * bh; + out[0] = albl & mask_low; // out00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + out[0] ^= temp << (sizeof(digit_t) * 4); // out01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + out[1] = temp & mask_low; // out10 + carry = temp & mask_high; + out[1] ^= (ahbh & mask_high) + carry; // out11 + +#endif +} + +void +mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision addition + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(c[i], carry, a[i], b[i], carry); + } +} + +digit_t +mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision right shift by 1...RADIX-1 + digit_t bit_out = x[0] & 1; + + for (unsigned int i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], shift, x[i], RADIX); + } + x[nwords - 1] >>= shift; + return bit_out; +} + +void +mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision left shift by 1...RADIX-1 + + for (int i = nwords - 1; i > 0; i--) { + SHIFTL(x[i], x[i - 1], shift, x[i], RADIX); + } + x[0] <<= shift; +} + +void +multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ + int t = shift; + while (t > RADIX - 1) { + mp_shiftl(x, RADIX - 1, nwords); + t = t - (RADIX - 1); + } + mp_shiftl(x, t, nwords); +} + +// The below functions were taken from the EC module + +void +mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision subtraction, assuming a > b + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(c[i], borrow, a[i], b[i], borrow); + } +} + +void +select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords) +{ // Select c <- a if mask = 0, select c <- b if mask = 1...1 + + for (int i = 0; i < nwords; i++) { + c[i] = ((a[i] ^ b[i]) & mask) ^ a[i]; + } +} + +void +swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords) +{ // Swap entries + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then a <- b and b <- a + digit_t temp; + + for (int i = 0; i < nwords; i++) { + temp = option & (a[i] ^ b[i]); + a[i] = temp ^ a[i]; + b[i] = temp ^ b[i]; + } +} + +int +mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords) +{ // Multiprecision comparison, a=b? : (1) a>b, (0) a=b, (-1) a= 0; i--) { + if (a[i] > b[i]) + return 1; + else if (a[i] < b[i]) + return -1; + } + return 0; +} + +bool +mp_is_zero(const digit_t *a, unsigned int nwords) +{ // Is a multiprecision element zero? + // Returns 1 (true) if a=0, 0 (false) otherwise + digit_t r = 0; + + for (unsigned int i = 0; i < nwords; i++) + r |= a[i] ^ 0; + + return (bool)is_digit_zero_ct(r); +} + +void +mp_mul2(digit_t *c, const digit_t *a, const digit_t *b) +{ // Multiprecision multiplication fixed to two-digit operands + unsigned int carry = 0; + digit_t t0[2], t1[2], t2[2]; + + MUL(t0, a[0], b[0]); + MUL(t1, a[0], b[1]); + ADDC(t0[1], carry, t0[1], t1[0], carry); + ADDC(t1[1], carry, 0, t1[1], carry); + MUL(t2, a[1], b[1]); + ADDC(t2[0], carry, t2[0], t1[1], carry); + ADDC(t2[1], carry, 0, t2[1], carry); + c[0] = t0[0]; + c[1] = t0[1]; + c[2] = t2[0]; + c[3] = t2[1]; +} + +void +mp_print(const digit_t *a, size_t nwords) +{ + printf("0x"); + for (size_t i = 0; i < nwords; i++) { +#ifdef RADIX_32 + printf("%08" PRIx32, a[nwords - i - 1]); // Print each word with 8 hex digits +#elif defined(RADIX_64) + printf("%016" PRIx64, a[nwords - i - 1]); // Print each word with 16 hex digits +#endif + } +} + +void +mp_copy(digit_t *b, const digit_t *a, size_t nwords) +{ + for (size_t i = 0; i < nwords; i++) { + b[i] = a[i]; + } +} + +void +mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords) +{ + // Multiprecision multiplication, c = a*b, for nwords-digit inputs, with nwords-digit output + // explicitly does not use the higher half of c, as we do not need in our applications + digit_t carry, UV[2], t[nwords], cc[nwords]; + + for (size_t i = 0; i < nwords; i++) { + cc[i] = 0; + } + + for (size_t i = 0; i < nwords; i++) { + + MUL(t, a[i], b[0]); + + for (size_t j = 1; j < nwords - 1; j++) { + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + t[j + 1] = UV[1] + carry; + } + + int j = nwords - 1; + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + + mp_add(&cc[i], &cc[i], t, nwords - i); + } + + mp_copy(c, cc, nwords); +} + +void +mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords) +{ // Multiprecision modulo 2^e, with 0 <= a < 2^(e) + unsigned int i, q = e >> LOG2RADIX, r = e & (RADIX - 1); + + if (q < nwords) { + a[q] &= ((digit_t)1 << r) - 1; + + for (i = q + 1; i < nwords; i++) { + a[i] = 0; + } + } +} + +void +mp_neg(digit_t *a, unsigned int nwords) +{ // negates a + for (size_t i = 0; i < nwords; i++) { + a[i] ^= -1; + } + + a[0] += 1; +} + +bool +mp_is_one(const digit_t *x, unsigned int nwords) +{ // returns true if x represents 1, and false otherwise + if (x[0] != 1) { + return false; + } + + for (size_t i = 1; i < nwords; i++) { + if (x[i] != 0) { + return false; + } + } + return true; +} + +void +mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords) +{ // Inversion modulo 2^e, using Newton's method and Hensel lifting + // we take the first power of 2 larger than e to use + // requires a to be odd, of course + // returns b such that a*b = 1 mod 2^e + assert((a[0] & 1) == 1); + + digit_t x[nwords], y[nwords], aa[nwords], mp_one[nwords], tmp[nwords]; + mp_copy(aa, a, nwords); + + mp_one[0] = 1; + for (unsigned int i = 1; i < nwords; i++) { + mp_one[i] = 0; + } + + int p = 1; + while ((1 << p) < e) { + p++; + } + p -= 2; // using k = 4 for initial inverse + int w = (1 << (p + 2)); + + mp_mod_2exp(aa, w, nwords); + mp_add(x, aa, aa, nwords); + mp_add(x, x, aa, nwords); // should be 3a + x[0] ^= (1 << 1); // so that x equals (3a)^2 xor 2 + mp_mod_2exp(x, w, nwords); // now x*a = 1 mod 2^4, which we lift + + mp_mul(tmp, aa, x, nwords); + mp_neg(tmp, nwords); + mp_add(y, mp_one, tmp, nwords); + + // Hensel lifting for p rounds + for (int i = 0; i < p; i++) { + mp_add(tmp, mp_one, y, nwords); + mp_mul(x, x, tmp, nwords); + mp_mul(y, y, y, nwords); + } + + mp_mod_2exp(x, w, nwords); + mp_copy(b, x, nwords); + + // verify results + mp_mul(x, x, aa, nwords); + mp_mod_2exp(x, w, nwords); + assert(mp_is_one(x, nwords)); +} + +void +mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords) +{ + // given a matrix ( ( a, b ), (c, d) ) of values mod 2^e + // returns the inverse matrix gamma ( (d, -b), (-c, a) ) + // where gamma is the inverse of the determinant a*d - b*c + // assumes the matrix is invertible, otherwises, inversion of determinant fails + + int p = 1; + while ((1 << p) < e) { + p++; + } + int w = (1 << (p)); + + digit_t det[nwords], tmp[nwords], resa[nwords], resb[nwords], resc[nwords], resd[nwords]; + mp_mul(tmp, r1, s2, nwords); + mp_mul(det, r2, s1, nwords); + mp_sub(det, tmp, det, nwords); + mp_inv_2e(det, det, e, nwords); + + mp_mul(resa, det, s2, nwords); + mp_mul(resb, det, r2, nwords); + mp_mul(resc, det, s1, nwords); + mp_mul(resd, det, r1, nwords); + + mp_neg(resb, nwords); + mp_neg(resc, nwords); + + mp_mod_2exp(resa, w, nwords); + mp_mod_2exp(resb, w, nwords); + mp_mod_2exp(resc, w, nwords); + mp_mod_2exp(resd, w, nwords); + + mp_copy(r1, resa, nwords); + mp_copy(r2, resb, nwords); + mp_copy(s1, resc, nwords); + mp_copy(s2, resd, nwords); +} diff --git a/src/pqm4/sqisign_lvl1/ref/mp.h b/src/pqm4/sqisign_lvl1/ref/mp.h new file mode 100644 index 0000000..b3733b5 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/mp.h @@ -0,0 +1,88 @@ +#ifndef MP_H +#define MP_H + +#include +#include +#include + +// Functions taken from the GF module + +void mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +digit_t mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords); +void multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void MUL(digit_t *out, const digit_t a, const digit_t b); + +// Functions taken from the EC module + +void mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +void select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords); +void swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords); +int mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords); +bool mp_is_zero(const digit_t *a, unsigned int nwords); +void mp_mul2(digit_t *c, const digit_t *a, const digit_t *b); + +// Further functions for multiprecision arithmetic +void mp_print(const digit_t *a, size_t nwords); +void mp_copy(digit_t *b, const digit_t *a, size_t nwords); +void mp_neg(digit_t *a, unsigned int nwords); +bool mp_is_one(const digit_t *x, unsigned int nwords); +void mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords); +void mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords); +void mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords); +void mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords); + +#define mp_is_odd(x, nwords) (((nwords) != 0) & (int)(x)[0]) +#define mp_is_even(x, nwords) (!mp_is_odd(x, nwords)) + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise +static inline unsigned int +is_digit_nonzero_ct(digit_t x) +{ // Is x != 0? + return (unsigned int)((x | (0 - x)) >> (RADIX - 1)); +} + +static inline unsigned int +is_digit_zero_ct(digit_t x) +{ // Is x = 0? + return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); +} + +static inline unsigned int +is_digit_lessthan_ct(digit_t x, digit_t y) +{ // Is x < y? + return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Platform-independent macros for digit-size operations + * **********************/ + +// Digit addition with carry +#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = \ + (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/pqm4_api.c b/src/pqm4/sqisign_lvl1/ref/pqm4_api.c new file mode 100644 index 0000000..998fbd2 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/pqm4_api.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +typedef struct { + size_t mlen; + char msg[59]; + size_t smlen; + char sm[59 + CRYPTO_BYTES]; +} SQISign_KAT_t; + +const char kat_lvl1_pk[CRYPTO_PUBLICKEYBYTES] = { + 0x9F, 0x5F, 0x7F, 0xF0, 0x79, 0x3F, 0x17, 0x1C, 0x9B, 0x5D, 0x1B, 0x05, 0x99, 0xA8, 0x17, 0x68, 0x95, 0x14, 0x35, 0xFE, 0x8B, 0x18, 0x6D, 0xE0, 0xA1, 0x8B, 0xA0, 0xAB, 0x58, 0x39, 0x8C, 0x03, 0x7F, 0x40, 0xCC, 0x35, 0x7B, 0x0F, 0x4C, 0xAE, 0x9A, 0x93, 0x23, 0xCB, 0x31, 0xF2, 0x4C, 0x24, 0x47, 0xCA, 0x47, 0x17, 0x38, 0xD6, 0x00, 0x09, 0x34, 0xC3, 0x16, 0x54, 0x10, 0x8B, 0x42, 0x01, 0x0B, +}; + +const SQISign_KAT_t kat_lvl1[2] = { + { + .mlen = 32, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 32 + CRYPTO_BYTES, + .sm = { 0x7A, 0x34, 0xF2, 0xA7, 0xA8, 0x4F, 0xC0, 0x27, 0x0A, 0x4C, 0xEF, 0x98, 0x59, 0x0A, 0x18, 0x15, 0x6D, 0xBC, 0xC0, 0x22, 0xB5, 0x63, 0x1D, 0x20, 0xED, 0xB7, 0x37, 0x01, 0xC1, 0xF6, 0x02, 0x01, 0xF8, 0x51, 0x62, 0xA6, 0xA4, 0xF9, 0x6D, 0x92, 0xEA, 0x96, 0xE3, 0x11, 0x8B, 0x1A, 0x8C, 0xC9, 0x4A, 0x22, 0xF2, 0xD9, 0x36, 0x9A, 0xF6, 0xBD, 0x29, 0x84, 0x5A, 0xC8, 0x17, 0x2E, 0x73, 0x02, 0x00, 0x01, 0x36, 0x4C, 0x4B, 0x39, 0xFD, 0xF0, 0x1A, 0x6A, 0x89, 0xA4, 0xAB, 0x69, 0x67, 0x9D, 0xA0, 0x84, 0x5B, 0x2A, 0x9D, 0x1A, 0x89, 0x69, 0xAB, 0x7E, 0x6B, 0x44, 0xE5, 0xC9, 0x26, 0xEA, 0x3F, 0x16, 0x5A, 0x19, 0xFB, 0x24, 0x13, 0x4E, 0x69, 0x2B, 0x76, 0xBB, 0x41, 0x58, 0x90, 0x30, 0x2A, 0x37, 0x14, 0x84, 0xC9, 0x25, 0x92, 0x8D, 0xAB, 0x3C, 0x8E, 0x79, 0x08, 0x5C, 0xA6, 0x7F, 0x2F, 0x85, 0x10, 0x03, 0xB4, 0xE6, 0xCC, 0xB2, 0x09, 0xF2, 0xE2, 0x98, 0xC3, 0x8A, 0x47, 0xEF, 0x83, 0x00, 0x04, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, + { + .mlen = 59, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 59 + CRYPTO_BYTES, + .sm = { 0xB5, 0xCC, 0x8D, 0xAA, 0xC9, 0xFD, 0x34, 0x72, 0xB7, 0xF5, 0xC8, 0x92, 0x9E, 0x4B, 0x3B, 0x6E, 0xF7, 0x32, 0x50, 0x9F, 0xC0, 0xFD, 0xA4, 0xDD, 0x54, 0xFB, 0xFB, 0x28, 0x60, 0xAB, 0x40, 0x00, 0xD8, 0x60, 0xE0, 0xDD, 0x7E, 0xBD, 0xB1, 0x0F, 0xB6, 0x0A, 0xDC, 0x5E, 0xCC, 0x47, 0xC7, 0xDE, 0x50, 0x39, 0x87, 0x04, 0x4D, 0xF3, 0xC1, 0xBB, 0xDE, 0xAC, 0x9D, 0x55, 0x01, 0x61, 0x75, 0x03, 0x02, 0x01, 0xEE, 0xC8, 0x45, 0x75, 0xBD, 0xAC, 0x80, 0xC8, 0x06, 0x0F, 0xB0, 0x64, 0x34, 0x38, 0x8F, 0x39, 0x45, 0x75, 0xFF, 0x58, 0x0D, 0x78, 0xB2, 0xB5, 0x90, 0x17, 0x51, 0x39, 0x42, 0xAC, 0x21, 0x1E, 0x78, 0x90, 0xD3, 0xFA, 0x8D, 0xDC, 0x02, 0xC7, 0xB8, 0x31, 0x8B, 0x8E, 0x31, 0xD2, 0xF1, 0x25, 0xE9, 0xA3, 0xAC, 0x1E, 0x16, 0x9B, 0xD2, 0xA4, 0x6B, 0xC9, 0x27, 0xF1, 0xE0, 0x13, 0x50, 0x28, 0x7B, 0x23, 0x10, 0xCB, 0x69, 0x7D, 0x67, 0x8C, 0xB2, 0xB7, 0x07, 0x7F, 0xD4, 0xF5, 0x48, 0x01, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, +}; + +int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + memcpy(pk, kat_lvl1_pk, CRYPTO_PUBLICKEYBYTES); + // We don't need the secret key + memset(sk, 0, CRYPTO_SECRETKEYBYTES); +} + +int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + for (size_t i = 0; i < sizeof(kat_lvl1) / sizeof(kat_lvl1[0]); i++) { + if (mlen == kat_lvl1[i].mlen) { + memcpy(sm, kat_lvl1[i].sm, kat_lvl1[i].smlen); + *smlen = kat_lvl1[i].smlen; + return 0; + } + } + + return 1; +} + +int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, + size_t smlen, const unsigned char *pk) { + unsigned long long mlen_ull = *mlen; + int ret = sqisign_open(m, &mlen_ull, sm, smlen, pk); + if (mlen) { + *mlen = mlen_ull; + } + return ret; +} diff --git a/src/pqm4/sqisign_lvl1/ref/rng.h b/src/pqm4/sqisign_lvl1/ref/rng.h new file mode 100644 index 0000000..3c24d07 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/rng.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef rng_h +#define rng_h + +#include "randombytes.h" + +#endif /* rng_h */ diff --git a/src/pqm4/sqisign_lvl1/ref/sig.h b/src/pqm4/sqisign_lvl1/ref/sig.h new file mode 100644 index 0000000..4c33510 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/sig.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SQISIGN_H +#define SQISIGN_H + +#include +#include + +#if defined(ENABLE_SIGN) +/** + * SQIsign keypair generation. + * + * The implementation corresponds to SQIsign.CompactKeyGen() in the SQIsign spec. + * The caller is responsible to allocate sufficient memory to hold pk and sk. + * + * @param[out] pk SQIsign public key + * @param[out] sk SQIsign secret key + * @return int status code + */ +SQISIGN_API +int sqisign_keypair(unsigned char *pk, unsigned char *sk); + +/** + * SQIsign signature generation. + * + * The implementation performs SQIsign.expandSK() + SQIsign.sign() in the SQIsign spec. + * Keys provided is a compacted secret keys. + * The caller is responsible to allocate sufficient memory to hold sm. + * + * @param[out] sm Signature concatenated with message + * @param[out] smlen Pointer to the length of sm + * @param[in] m Message to be signed + * @param[in] mlen Message length + * @param[in] sk Compacted secret key + * @return int status code + */ +SQISIGN_API +int sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk); +#endif + +/** + * SQIsign open signature. + * + * The implementation performs SQIsign.verify(). If the signature verification succeeded, the + * original message is stored in m. Keys provided is a compact public key. The caller is responsible + * to allocate sufficient memory to hold m. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sm Signature concatenated with message + * @param[in] smlen Length of sm + * @param[in] pk Compacted public key + * @return int status code + */ +SQISIGN_API +int sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk); + +/** + * SQIsign verify signature. + * + * If the signature verification succeeded, returns 0, otherwise 1. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sig Signature + * @param[in] siglen Length of sig + * @param[in] pk Compacted public key + * @return int 0 if verification succeeded, 1 otherwise. + */ +SQISIGN_API +int sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk); + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/sqisign.c b/src/pqm4/sqisign_lvl1/ref/sqisign.c new file mode 100644 index 0000000..57fd75d --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/sqisign.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#if defined(ENABLE_SIGN) +#include +#endif + +#if defined(ENABLE_SIGN) +SQISIGN_API +int +sqisign_keypair(unsigned char *pk, unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + secret_key_init(&skt); + + ret = !protocols_keygen(&pkt, &skt); + + secret_key_to_bytes(sk, &skt, &pkt); + public_key_to_bytes(pk, &pkt); + secret_key_finalize(&skt); + return ret; +} + +SQISIGN_API +int +sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + signature_t sigt; + secret_key_init(&skt); + secret_key_from_bytes(&skt, &pkt, sk); + + memmove(sm + SIGNATURE_BYTES, m, mlen); + + ret = !protocols_sign(&sigt, &pkt, &skt, sm + SIGNATURE_BYTES, mlen); + if (ret != 0) { + *smlen = 0; + goto err; + } + + signature_to_bytes(sm, &sigt); + *smlen = SIGNATURE_BYTES + mlen; + +err: + secret_key_finalize(&skt); + return ret; +} +#endif + +SQISIGN_API +int +sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk) +{ + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sm); + + ret = !protocols_verify(&sigt, &pkt, sm + SIGNATURE_BYTES, smlen - SIGNATURE_BYTES); + + if (!ret) { + *mlen = smlen - SIGNATURE_BYTES; + memmove(m, sm + SIGNATURE_BYTES, *mlen); + } else { + *mlen = 0; + memset(m, 0, smlen - SIGNATURE_BYTES); + } + + return ret; +} + +SQISIGN_API +int +sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk) +{ + + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sig); + + ret = !protocols_verify(&sigt, &pkt, m, mlen); + + return ret; +} diff --git a/src/pqm4/sqisign_lvl1/ref/sqisign_namespace.h b/src/pqm4/sqisign_lvl1/ref/sqisign_namespace.h new file mode 100644 index 0000000..14fd51d --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/sqisign_namespace.h @@ -0,0 +1,1022 @@ + +#ifndef SQISIGN_NAMESPACE_H +#define SQISIGN_NAMESPACE_H + +//#define DISABLE_NAMESPACING + +#if defined(_WIN32) +#define SQISIGN_API __declspec(dllexport) +#else +#define SQISIGN_API __attribute__((visibility("default"))) +#endif + +#define PARAM_JOIN3_(a, b, c) sqisign_##a##_##b##_##c +#define PARAM_JOIN3(a, b, c) PARAM_JOIN3_(a, b, c) +#define PARAM_NAME3(end, s) PARAM_JOIN3(SQISIGN_VARIANT, end, s) + +#define PARAM_JOIN2_(a, b) sqisign_##a##_##b +#define PARAM_JOIN2(a, b) PARAM_JOIN2_(a, b) +#define PARAM_NAME2(end, s) PARAM_JOIN2(end, s) + +#ifndef DISABLE_NAMESPACING +#define SQISIGN_NAMESPACE_GENERIC(s) PARAM_NAME2(gen, s) +#else +#define SQISIGN_NAMESPACE_GENERIC(s) s +#endif + +#if defined(SQISIGN_VARIANT) && !defined(DISABLE_NAMESPACING) +#if defined(SQISIGN_BUILD_TYPE_REF) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(ref, s) +#elif defined(SQISIGN_BUILD_TYPE_OPT) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(opt, s) +#elif defined(SQISIGN_BUILD_TYPE_BROADWELL) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(broadwell, s) +#elif defined(SQISIGN_BUILD_TYPE_ARM64CRYPTO) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(arm64crypto, s) +#else +#error "Build type not known" +#endif + +#else +#define SQISIGN_NAMESPACE(s) s +#endif + +// Namespacing symbols exported from algebra.c: +#undef quat_alg_add +#undef quat_alg_conj +#undef quat_alg_coord_mul +#undef quat_alg_elem_copy +#undef quat_alg_elem_copy_ibz +#undef quat_alg_elem_equal +#undef quat_alg_elem_is_zero +#undef quat_alg_elem_mul_by_scalar +#undef quat_alg_elem_set +#undef quat_alg_equal_denom +#undef quat_alg_init_set_ui +#undef quat_alg_make_primitive +#undef quat_alg_mul +#undef quat_alg_norm +#undef quat_alg_normalize +#undef quat_alg_scalar +#undef quat_alg_sub + +#define quat_alg_add SQISIGN_NAMESPACE_GENERIC(quat_alg_add) +#define quat_alg_conj SQISIGN_NAMESPACE_GENERIC(quat_alg_conj) +#define quat_alg_coord_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_coord_mul) +#define quat_alg_elem_copy SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy) +#define quat_alg_elem_copy_ibz SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy_ibz) +#define quat_alg_elem_equal SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_equal) +#define quat_alg_elem_is_zero SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_is_zero) +#define quat_alg_elem_mul_by_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_mul_by_scalar) +#define quat_alg_elem_set SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_set) +#define quat_alg_equal_denom SQISIGN_NAMESPACE_GENERIC(quat_alg_equal_denom) +#define quat_alg_init_set_ui SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set_ui) +#define quat_alg_make_primitive SQISIGN_NAMESPACE_GENERIC(quat_alg_make_primitive) +#define quat_alg_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_mul) +#define quat_alg_norm SQISIGN_NAMESPACE_GENERIC(quat_alg_norm) +#define quat_alg_normalize SQISIGN_NAMESPACE_GENERIC(quat_alg_normalize) +#define quat_alg_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_scalar) +#define quat_alg_sub SQISIGN_NAMESPACE_GENERIC(quat_alg_sub) + +// Namespacing symbols exported from api.c: +#undef crypto_sign +#undef crypto_sign_keypair +#undef crypto_sign_open + +#define crypto_sign SQISIGN_NAMESPACE(crypto_sign) +#define crypto_sign_keypair SQISIGN_NAMESPACE(crypto_sign_keypair) +#define crypto_sign_open SQISIGN_NAMESPACE(crypto_sign_open) + +// Namespacing symbols exported from basis.c: +#undef ec_curve_to_basis_2f_from_hint +#undef ec_curve_to_basis_2f_to_hint +#undef ec_recover_y +#undef lift_basis +#undef lift_basis_normalized + +#define ec_curve_to_basis_2f_from_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_from_hint) +#define ec_curve_to_basis_2f_to_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_to_hint) +#define ec_recover_y SQISIGN_NAMESPACE(ec_recover_y) +#define lift_basis SQISIGN_NAMESPACE(lift_basis) +#define lift_basis_normalized SQISIGN_NAMESPACE(lift_basis_normalized) + +// Namespacing symbols exported from biextension.c: +#undef clear_cofac +#undef ec_dlog_2_tate +#undef ec_dlog_2_weil +#undef fp2_frob +#undef reduced_tate +#undef weil + +#define clear_cofac SQISIGN_NAMESPACE(clear_cofac) +#define ec_dlog_2_tate SQISIGN_NAMESPACE(ec_dlog_2_tate) +#define ec_dlog_2_weil SQISIGN_NAMESPACE(ec_dlog_2_weil) +#define fp2_frob SQISIGN_NAMESPACE(fp2_frob) +#define reduced_tate SQISIGN_NAMESPACE(reduced_tate) +#define weil SQISIGN_NAMESPACE(weil) + +// Namespacing symbols exported from common.c: +#undef hash_to_challenge +#undef public_key_finalize +#undef public_key_init + +#define hash_to_challenge SQISIGN_NAMESPACE(hash_to_challenge) +#define public_key_finalize SQISIGN_NAMESPACE(public_key_finalize) +#define public_key_init SQISIGN_NAMESPACE(public_key_init) + +// Namespacing symbols exported from dim2.c: +#undef ibz_2x2_mul_mod +#undef ibz_mat_2x2_add +#undef ibz_mat_2x2_copy +#undef ibz_mat_2x2_det_from_ibz +#undef ibz_mat_2x2_eval +#undef ibz_mat_2x2_inv_mod +#undef ibz_mat_2x2_set +#undef ibz_vec_2_set + +#define ibz_2x2_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_2x2_mul_mod) +#define ibz_mat_2x2_add SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_add) +#define ibz_mat_2x2_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_copy) +#define ibz_mat_2x2_det_from_ibz SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_det_from_ibz) +#define ibz_mat_2x2_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_eval) +#define ibz_mat_2x2_inv_mod SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_inv_mod) +#define ibz_mat_2x2_set SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_set) +#define ibz_vec_2_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_set) + +// Namespacing symbols exported from dim2id2iso.c: +#undef dim2id2iso_arbitrary_isogeny_evaluation +#undef dim2id2iso_ideal_to_isogeny_clapotis +#undef find_uv +#undef fixed_degree_isogeny_and_eval + +#define dim2id2iso_arbitrary_isogeny_evaluation SQISIGN_NAMESPACE(dim2id2iso_arbitrary_isogeny_evaluation) +#define dim2id2iso_ideal_to_isogeny_clapotis SQISIGN_NAMESPACE(dim2id2iso_ideal_to_isogeny_clapotis) +#define find_uv SQISIGN_NAMESPACE(find_uv) +#define fixed_degree_isogeny_and_eval SQISIGN_NAMESPACE(fixed_degree_isogeny_and_eval) + +// Namespacing symbols exported from dim4.c: +#undef ibz_inv_dim4_make_coeff_mpm +#undef ibz_inv_dim4_make_coeff_pmp +#undef ibz_mat_4x4_copy +#undef ibz_mat_4x4_equal +#undef ibz_mat_4x4_eval +#undef ibz_mat_4x4_eval_t +#undef ibz_mat_4x4_gcd +#undef ibz_mat_4x4_identity +#undef ibz_mat_4x4_inv_with_det_as_denom +#undef ibz_mat_4x4_is_identity +#undef ibz_mat_4x4_mul +#undef ibz_mat_4x4_negate +#undef ibz_mat_4x4_scalar_div +#undef ibz_mat_4x4_scalar_mul +#undef ibz_mat_4x4_transpose +#undef ibz_mat_4x4_zero +#undef ibz_vec_4_add +#undef ibz_vec_4_content +#undef ibz_vec_4_copy +#undef ibz_vec_4_copy_ibz +#undef ibz_vec_4_is_zero +#undef ibz_vec_4_linear_combination +#undef ibz_vec_4_negate +#undef ibz_vec_4_scalar_div +#undef ibz_vec_4_scalar_mul +#undef ibz_vec_4_set +#undef ibz_vec_4_sub +#undef quat_qf_eval + +#define ibz_inv_dim4_make_coeff_mpm SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_mpm) +#define ibz_inv_dim4_make_coeff_pmp SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_pmp) +#define ibz_mat_4x4_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_copy) +#define ibz_mat_4x4_equal SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_equal) +#define ibz_mat_4x4_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval) +#define ibz_mat_4x4_eval_t SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval_t) +#define ibz_mat_4x4_gcd SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_gcd) +#define ibz_mat_4x4_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_identity) +#define ibz_mat_4x4_inv_with_det_as_denom SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_inv_with_det_as_denom) +#define ibz_mat_4x4_is_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_identity) +#define ibz_mat_4x4_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_mul) +#define ibz_mat_4x4_negate SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_negate) +#define ibz_mat_4x4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_div) +#define ibz_mat_4x4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_mul) +#define ibz_mat_4x4_transpose SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_transpose) +#define ibz_mat_4x4_zero SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_zero) +#define ibz_vec_4_add SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_add) +#define ibz_vec_4_content SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_content) +#define ibz_vec_4_copy SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy) +#define ibz_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_ibz) +#define ibz_vec_4_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_is_zero) +#define ibz_vec_4_linear_combination SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination) +#define ibz_vec_4_negate SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_negate) +#define ibz_vec_4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_div) +#define ibz_vec_4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul) +#define ibz_vec_4_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_set) +#define ibz_vec_4_sub SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_sub) +#define quat_qf_eval SQISIGN_NAMESPACE_GENERIC(quat_qf_eval) + +// Namespacing symbols exported from ec.c: +#undef cswap_points +#undef ec_biscalar_mul +#undef ec_curve_init +#undef ec_curve_init_from_A +#undef ec_curve_normalize_A24 +#undef ec_curve_verify_A +#undef ec_dbl +#undef ec_dbl_iter +#undef ec_dbl_iter_basis +#undef ec_has_zero_coordinate +#undef ec_is_basis_four_torsion +#undef ec_is_equal +#undef ec_is_four_torsion +#undef ec_is_two_torsion +#undef ec_is_zero +#undef ec_j_inv +#undef ec_ladder3pt +#undef ec_mul +#undef ec_normalize_curve +#undef ec_normalize_curve_and_A24 +#undef ec_normalize_point +#undef ec_point_init +#undef select_point +#undef xADD +#undef xDBL +#undef xDBLADD +#undef xDBLMUL +#undef xDBL_A24 +#undef xDBL_E0 +#undef xMUL + +#define cswap_points SQISIGN_NAMESPACE(cswap_points) +#define ec_biscalar_mul SQISIGN_NAMESPACE(ec_biscalar_mul) +#define ec_curve_init SQISIGN_NAMESPACE(ec_curve_init) +#define ec_curve_init_from_A SQISIGN_NAMESPACE(ec_curve_init_from_A) +#define ec_curve_normalize_A24 SQISIGN_NAMESPACE(ec_curve_normalize_A24) +#define ec_curve_verify_A SQISIGN_NAMESPACE(ec_curve_verify_A) +#define ec_dbl SQISIGN_NAMESPACE(ec_dbl) +#define ec_dbl_iter SQISIGN_NAMESPACE(ec_dbl_iter) +#define ec_dbl_iter_basis SQISIGN_NAMESPACE(ec_dbl_iter_basis) +#define ec_has_zero_coordinate SQISIGN_NAMESPACE(ec_has_zero_coordinate) +#define ec_is_basis_four_torsion SQISIGN_NAMESPACE(ec_is_basis_four_torsion) +#define ec_is_equal SQISIGN_NAMESPACE(ec_is_equal) +#define ec_is_four_torsion SQISIGN_NAMESPACE(ec_is_four_torsion) +#define ec_is_two_torsion SQISIGN_NAMESPACE(ec_is_two_torsion) +#define ec_is_zero SQISIGN_NAMESPACE(ec_is_zero) +#define ec_j_inv SQISIGN_NAMESPACE(ec_j_inv) +#define ec_ladder3pt SQISIGN_NAMESPACE(ec_ladder3pt) +#define ec_mul SQISIGN_NAMESPACE(ec_mul) +#define ec_normalize_curve SQISIGN_NAMESPACE(ec_normalize_curve) +#define ec_normalize_curve_and_A24 SQISIGN_NAMESPACE(ec_normalize_curve_and_A24) +#define ec_normalize_point SQISIGN_NAMESPACE(ec_normalize_point) +#define ec_point_init SQISIGN_NAMESPACE(ec_point_init) +#define select_point SQISIGN_NAMESPACE(select_point) +#define xADD SQISIGN_NAMESPACE(xADD) +#define xDBL SQISIGN_NAMESPACE(xDBL) +#define xDBLADD SQISIGN_NAMESPACE(xDBLADD) +#define xDBLMUL SQISIGN_NAMESPACE(xDBLMUL) +#define xDBL_A24 SQISIGN_NAMESPACE(xDBL_A24) +#define xDBL_E0 SQISIGN_NAMESPACE(xDBL_E0) +#define xMUL SQISIGN_NAMESPACE(xMUL) + +// Namespacing symbols exported from ec_jac.c: +#undef ADD +#undef DBL +#undef DBLW +#undef copy_jac_point +#undef jac_from_ws +#undef jac_init +#undef jac_is_equal +#undef jac_neg +#undef jac_to_ws +#undef jac_to_xz +#undef jac_to_xz_add_components +#undef select_jac_point + +#define ADD SQISIGN_NAMESPACE(ADD) +#define DBL SQISIGN_NAMESPACE(DBL) +#define DBLW SQISIGN_NAMESPACE(DBLW) +#define copy_jac_point SQISIGN_NAMESPACE(copy_jac_point) +#define jac_from_ws SQISIGN_NAMESPACE(jac_from_ws) +#define jac_init SQISIGN_NAMESPACE(jac_init) +#define jac_is_equal SQISIGN_NAMESPACE(jac_is_equal) +#define jac_neg SQISIGN_NAMESPACE(jac_neg) +#define jac_to_ws SQISIGN_NAMESPACE(jac_to_ws) +#define jac_to_xz SQISIGN_NAMESPACE(jac_to_xz) +#define jac_to_xz_add_components SQISIGN_NAMESPACE(jac_to_xz_add_components) +#define select_jac_point SQISIGN_NAMESPACE(select_jac_point) + +// Namespacing symbols exported from encode_signature.c: +#undef secret_key_from_bytes +#undef secret_key_to_bytes + +#define secret_key_from_bytes SQISIGN_NAMESPACE(secret_key_from_bytes) +#define secret_key_to_bytes SQISIGN_NAMESPACE(secret_key_to_bytes) + +// Namespacing symbols exported from encode_verification.c: +#undef public_key_from_bytes +#undef public_key_to_bytes +#undef signature_from_bytes +#undef signature_to_bytes + +#define public_key_from_bytes SQISIGN_NAMESPACE(public_key_from_bytes) +#define public_key_to_bytes SQISIGN_NAMESPACE(public_key_to_bytes) +#define signature_from_bytes SQISIGN_NAMESPACE(signature_from_bytes) +#define signature_to_bytes SQISIGN_NAMESPACE(signature_to_bytes) + +// Namespacing symbols exported from finit.c: +#undef ibz_mat_2x2_finalize +#undef ibz_mat_2x2_init +#undef ibz_mat_4x4_finalize +#undef ibz_mat_4x4_init +#undef ibz_vec_2_finalize +#undef ibz_vec_2_init +#undef ibz_vec_4_finalize +#undef ibz_vec_4_init +#undef quat_alg_elem_finalize +#undef quat_alg_elem_init +#undef quat_alg_finalize +#undef quat_alg_init_set +#undef quat_lattice_finalize +#undef quat_lattice_init +#undef quat_left_ideal_finalize +#undef quat_left_ideal_init + +#define ibz_mat_2x2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_finalize) +#define ibz_mat_2x2_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_init) +#define ibz_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_finalize) +#define ibz_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_init) +#define ibz_vec_2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_finalize) +#define ibz_vec_2_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_init) +#define ibz_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_finalize) +#define ibz_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_init) +#define quat_alg_elem_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_finalize) +#define quat_alg_elem_init SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_init) +#define quat_alg_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_finalize) +#define quat_alg_init_set SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set) +#define quat_lattice_finalize SQISIGN_NAMESPACE_GENERIC(quat_lattice_finalize) +#define quat_lattice_init SQISIGN_NAMESPACE_GENERIC(quat_lattice_init) +#define quat_left_ideal_finalize SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_finalize) +#define quat_left_ideal_init SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_init) + +// Namespacing symbols exported from fp.c: +#undef fp_select + +#define fp_select SQISIGN_NAMESPACE(fp_select) + +// Namespacing symbols exported from fp.c, fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_exp3div4 +#undef fp_inv +#undef fp_is_square +#undef fp_sqrt + +#define fp_exp3div4 SQISIGN_NAMESPACE(fp_exp3div4) +#define fp_inv SQISIGN_NAMESPACE(fp_inv) +#define fp_is_square SQISIGN_NAMESPACE(fp_is_square) +#define fp_sqrt SQISIGN_NAMESPACE(fp_sqrt) + +// Namespacing symbols exported from fp2.c: +#undef fp2_add +#undef fp2_add_one +#undef fp2_batched_inv +#undef fp2_copy +#undef fp2_cswap +#undef fp2_decode +#undef fp2_encode +#undef fp2_half +#undef fp2_inv +#undef fp2_is_equal +#undef fp2_is_one +#undef fp2_is_square +#undef fp2_is_zero +#undef fp2_mul +#undef fp2_mul_small +#undef fp2_neg +#undef fp2_pow_vartime +#undef fp2_print +#undef fp2_select +#undef fp2_set_one +#undef fp2_set_small +#undef fp2_set_zero +#undef fp2_sqr +#undef fp2_sqrt +#undef fp2_sqrt_verify +#undef fp2_sub + +#define fp2_add SQISIGN_NAMESPACE(fp2_add) +#define fp2_add_one SQISIGN_NAMESPACE(fp2_add_one) +#define fp2_batched_inv SQISIGN_NAMESPACE(fp2_batched_inv) +#define fp2_copy SQISIGN_NAMESPACE(fp2_copy) +#define fp2_cswap SQISIGN_NAMESPACE(fp2_cswap) +#define fp2_decode SQISIGN_NAMESPACE(fp2_decode) +#define fp2_encode SQISIGN_NAMESPACE(fp2_encode) +#define fp2_half SQISIGN_NAMESPACE(fp2_half) +#define fp2_inv SQISIGN_NAMESPACE(fp2_inv) +#define fp2_is_equal SQISIGN_NAMESPACE(fp2_is_equal) +#define fp2_is_one SQISIGN_NAMESPACE(fp2_is_one) +#define fp2_is_square SQISIGN_NAMESPACE(fp2_is_square) +#define fp2_is_zero SQISIGN_NAMESPACE(fp2_is_zero) +#define fp2_mul SQISIGN_NAMESPACE(fp2_mul) +#define fp2_mul_small SQISIGN_NAMESPACE(fp2_mul_small) +#define fp2_neg SQISIGN_NAMESPACE(fp2_neg) +#define fp2_pow_vartime SQISIGN_NAMESPACE(fp2_pow_vartime) +#define fp2_print SQISIGN_NAMESPACE(fp2_print) +#define fp2_select SQISIGN_NAMESPACE(fp2_select) +#define fp2_set_one SQISIGN_NAMESPACE(fp2_set_one) +#define fp2_set_small SQISIGN_NAMESPACE(fp2_set_small) +#define fp2_set_zero SQISIGN_NAMESPACE(fp2_set_zero) +#define fp2_sqr SQISIGN_NAMESPACE(fp2_sqr) +#define fp2_sqrt SQISIGN_NAMESPACE(fp2_sqrt) +#define fp2_sqrt_verify SQISIGN_NAMESPACE(fp2_sqrt_verify) +#define fp2_sub SQISIGN_NAMESPACE(fp2_sub) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_copy +#undef fp_cswap +#undef fp_decode +#undef fp_decode_reduce +#undef fp_div3 +#undef fp_encode +#undef fp_half +#undef fp_is_equal +#undef fp_is_zero +#undef fp_mul_small +#undef fp_neg +#undef fp_set_one +#undef fp_set_small +#undef fp_set_zero + +#define fp_copy SQISIGN_NAMESPACE(fp_copy) +#define fp_cswap SQISIGN_NAMESPACE(fp_cswap) +#define fp_decode SQISIGN_NAMESPACE(fp_decode) +#define fp_decode_reduce SQISIGN_NAMESPACE(fp_decode_reduce) +#define fp_div3 SQISIGN_NAMESPACE(fp_div3) +#define fp_encode SQISIGN_NAMESPACE(fp_encode) +#define fp_half SQISIGN_NAMESPACE(fp_half) +#define fp_is_equal SQISIGN_NAMESPACE(fp_is_equal) +#define fp_is_zero SQISIGN_NAMESPACE(fp_is_zero) +#define fp_mul_small SQISIGN_NAMESPACE(fp_mul_small) +#define fp_neg SQISIGN_NAMESPACE(fp_neg) +#define fp_set_one SQISIGN_NAMESPACE(fp_set_one) +#define fp_set_small SQISIGN_NAMESPACE(fp_set_small) +#define fp_set_zero SQISIGN_NAMESPACE(fp_set_zero) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c, gf27500.c, gf5248.c, gf65376.c: +#undef fp_add +#undef fp_mul +#undef fp_sqr +#undef fp_sub + +#define fp_add SQISIGN_NAMESPACE(fp_add) +#define fp_mul SQISIGN_NAMESPACE(fp_mul) +#define fp_sqr SQISIGN_NAMESPACE(fp_sqr) +#define fp_sub SQISIGN_NAMESPACE(fp_sub) + +// Namespacing symbols exported from gf27500.c: +#undef gf27500_decode +#undef gf27500_decode_reduce +#undef gf27500_div +#undef gf27500_div3 +#undef gf27500_encode +#undef gf27500_invert +#undef gf27500_legendre +#undef gf27500_sqrt + +#define gf27500_decode SQISIGN_NAMESPACE(gf27500_decode) +#define gf27500_decode_reduce SQISIGN_NAMESPACE(gf27500_decode_reduce) +#define gf27500_div SQISIGN_NAMESPACE(gf27500_div) +#define gf27500_div3 SQISIGN_NAMESPACE(gf27500_div3) +#define gf27500_encode SQISIGN_NAMESPACE(gf27500_encode) +#define gf27500_invert SQISIGN_NAMESPACE(gf27500_invert) +#define gf27500_legendre SQISIGN_NAMESPACE(gf27500_legendre) +#define gf27500_sqrt SQISIGN_NAMESPACE(gf27500_sqrt) + +// Namespacing symbols exported from gf27500.c, gf5248.c, gf65376.c: +#undef fp2_mul_c0 +#undef fp2_mul_c1 +#undef fp2_sq_c0 +#undef fp2_sq_c1 + +#define fp2_mul_c0 SQISIGN_NAMESPACE(fp2_mul_c0) +#define fp2_mul_c1 SQISIGN_NAMESPACE(fp2_mul_c1) +#define fp2_sq_c0 SQISIGN_NAMESPACE(fp2_sq_c0) +#define fp2_sq_c1 SQISIGN_NAMESPACE(fp2_sq_c1) + +// Namespacing symbols exported from gf5248.c: +#undef gf5248_decode +#undef gf5248_decode_reduce +#undef gf5248_div +#undef gf5248_div3 +#undef gf5248_encode +#undef gf5248_invert +#undef gf5248_legendre +#undef gf5248_sqrt + +#define gf5248_decode SQISIGN_NAMESPACE(gf5248_decode) +#define gf5248_decode_reduce SQISIGN_NAMESPACE(gf5248_decode_reduce) +#define gf5248_div SQISIGN_NAMESPACE(gf5248_div) +#define gf5248_div3 SQISIGN_NAMESPACE(gf5248_div3) +#define gf5248_encode SQISIGN_NAMESPACE(gf5248_encode) +#define gf5248_invert SQISIGN_NAMESPACE(gf5248_invert) +#define gf5248_legendre SQISIGN_NAMESPACE(gf5248_legendre) +#define gf5248_sqrt SQISIGN_NAMESPACE(gf5248_sqrt) + +// Namespacing symbols exported from gf65376.c: +#undef gf65376_decode +#undef gf65376_decode_reduce +#undef gf65376_div +#undef gf65376_div3 +#undef gf65376_encode +#undef gf65376_invert +#undef gf65376_legendre +#undef gf65376_sqrt + +#define gf65376_decode SQISIGN_NAMESPACE(gf65376_decode) +#define gf65376_decode_reduce SQISIGN_NAMESPACE(gf65376_decode_reduce) +#define gf65376_div SQISIGN_NAMESPACE(gf65376_div) +#define gf65376_div3 SQISIGN_NAMESPACE(gf65376_div3) +#define gf65376_encode SQISIGN_NAMESPACE(gf65376_encode) +#define gf65376_invert SQISIGN_NAMESPACE(gf65376_invert) +#define gf65376_legendre SQISIGN_NAMESPACE(gf65376_legendre) +#define gf65376_sqrt SQISIGN_NAMESPACE(gf65376_sqrt) + +// Namespacing symbols exported from hd.c: +#undef add_couple_jac_points +#undef copy_bases_to_kernel +#undef couple_jac_to_xz +#undef double_couple_jac_point +#undef double_couple_jac_point_iter +#undef double_couple_point +#undef double_couple_point_iter + +#define add_couple_jac_points SQISIGN_NAMESPACE(add_couple_jac_points) +#define copy_bases_to_kernel SQISIGN_NAMESPACE(copy_bases_to_kernel) +#define couple_jac_to_xz SQISIGN_NAMESPACE(couple_jac_to_xz) +#define double_couple_jac_point SQISIGN_NAMESPACE(double_couple_jac_point) +#define double_couple_jac_point_iter SQISIGN_NAMESPACE(double_couple_jac_point_iter) +#define double_couple_point SQISIGN_NAMESPACE(double_couple_point) +#define double_couple_point_iter SQISIGN_NAMESPACE(double_couple_point_iter) + +// Namespacing symbols exported from hnf.c: +#undef ibz_mat_4x4_is_hnf +#undef ibz_mat_4xn_hnf_mod_core +#undef ibz_vec_4_copy_mod +#undef ibz_vec_4_linear_combination_mod +#undef ibz_vec_4_scalar_mul_mod + +#define ibz_mat_4x4_is_hnf SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_hnf) +#define ibz_mat_4xn_hnf_mod_core SQISIGN_NAMESPACE_GENERIC(ibz_mat_4xn_hnf_mod_core) +#define ibz_vec_4_copy_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_mod) +#define ibz_vec_4_linear_combination_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination_mod) +#define ibz_vec_4_scalar_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul_mod) + +// Namespacing symbols exported from hnf_internal.c: +#undef ibz_centered_mod +#undef ibz_conditional_assign +#undef ibz_mod_not_zero +#undef ibz_xgcd_with_u_not_0 + +#define ibz_centered_mod SQISIGN_NAMESPACE_GENERIC(ibz_centered_mod) +#define ibz_conditional_assign SQISIGN_NAMESPACE_GENERIC(ibz_conditional_assign) +#define ibz_mod_not_zero SQISIGN_NAMESPACE_GENERIC(ibz_mod_not_zero) +#define ibz_xgcd_with_u_not_0 SQISIGN_NAMESPACE_GENERIC(ibz_xgcd_with_u_not_0) + +// Namespacing symbols exported from ibz_division.c: +#undef ibz_xgcd + +#define ibz_xgcd SQISIGN_NAMESPACE_GENERIC(ibz_xgcd) + +// Namespacing symbols exported from id2iso.c: +#undef change_of_basis_matrix_tate +#undef change_of_basis_matrix_tate_invert +#undef ec_biscalar_mul_ibz_vec +#undef endomorphism_application_even_basis +#undef id2iso_ideal_to_kernel_dlogs_even +#undef id2iso_kernel_dlogs_to_ideal_even +#undef matrix_application_even_basis + +#define change_of_basis_matrix_tate SQISIGN_NAMESPACE(change_of_basis_matrix_tate) +#define change_of_basis_matrix_tate_invert SQISIGN_NAMESPACE(change_of_basis_matrix_tate_invert) +#define ec_biscalar_mul_ibz_vec SQISIGN_NAMESPACE(ec_biscalar_mul_ibz_vec) +#define endomorphism_application_even_basis SQISIGN_NAMESPACE(endomorphism_application_even_basis) +#define id2iso_ideal_to_kernel_dlogs_even SQISIGN_NAMESPACE(id2iso_ideal_to_kernel_dlogs_even) +#define id2iso_kernel_dlogs_to_ideal_even SQISIGN_NAMESPACE(id2iso_kernel_dlogs_to_ideal_even) +#define matrix_application_even_basis SQISIGN_NAMESPACE(matrix_application_even_basis) + +// Namespacing symbols exported from ideal.c: +#undef quat_lideal_add +#undef quat_lideal_class_gram +#undef quat_lideal_conjugate_without_hnf +#undef quat_lideal_copy +#undef quat_lideal_create +#undef quat_lideal_create_principal +#undef quat_lideal_equals +#undef quat_lideal_generator +#undef quat_lideal_inter +#undef quat_lideal_inverse_lattice_without_hnf +#undef quat_lideal_mul +#undef quat_lideal_norm +#undef quat_lideal_right_order +#undef quat_lideal_right_transporter +#undef quat_order_discriminant +#undef quat_order_is_maximal + +#define quat_lideal_add SQISIGN_NAMESPACE_GENERIC(quat_lideal_add) +#define quat_lideal_class_gram SQISIGN_NAMESPACE_GENERIC(quat_lideal_class_gram) +#define quat_lideal_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_conjugate_without_hnf) +#define quat_lideal_copy SQISIGN_NAMESPACE_GENERIC(quat_lideal_copy) +#define quat_lideal_create SQISIGN_NAMESPACE_GENERIC(quat_lideal_create) +#define quat_lideal_create_principal SQISIGN_NAMESPACE_GENERIC(quat_lideal_create_principal) +#define quat_lideal_equals SQISIGN_NAMESPACE_GENERIC(quat_lideal_equals) +#define quat_lideal_generator SQISIGN_NAMESPACE_GENERIC(quat_lideal_generator) +#define quat_lideal_inter SQISIGN_NAMESPACE_GENERIC(quat_lideal_inter) +#define quat_lideal_inverse_lattice_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_inverse_lattice_without_hnf) +#define quat_lideal_mul SQISIGN_NAMESPACE_GENERIC(quat_lideal_mul) +#define quat_lideal_norm SQISIGN_NAMESPACE_GENERIC(quat_lideal_norm) +#define quat_lideal_right_order SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_order) +#define quat_lideal_right_transporter SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_transporter) +#define quat_order_discriminant SQISIGN_NAMESPACE_GENERIC(quat_order_discriminant) +#define quat_order_is_maximal SQISIGN_NAMESPACE_GENERIC(quat_order_is_maximal) + +// Namespacing symbols exported from intbig.c: +#undef ibz_abs +#undef ibz_add +#undef ibz_bitsize +#undef ibz_cmp +#undef ibz_cmp_int32 +#undef ibz_convert_to_str +#undef ibz_copy +#undef ibz_copy_digits +#undef ibz_div +#undef ibz_div_2exp +#undef ibz_div_floor +#undef ibz_divides +#undef ibz_finalize +#undef ibz_gcd +#undef ibz_get +#undef ibz_init +#undef ibz_invmod +#undef ibz_is_even +#undef ibz_is_odd +#undef ibz_is_one +#undef ibz_is_zero +#undef ibz_legendre +#undef ibz_mod +#undef ibz_mod_ui +#undef ibz_mul +#undef ibz_neg +#undef ibz_pow +#undef ibz_pow_mod +#undef ibz_print +#undef ibz_probab_prime +#undef ibz_rand_interval +#undef ibz_rand_interval_bits +#undef ibz_rand_interval_i +#undef ibz_rand_interval_minm_m +#undef ibz_set +#undef ibz_set_from_str +#undef ibz_size_in_base +#undef ibz_sqrt +#undef ibz_sqrt_floor +#undef ibz_sqrt_mod_p +#undef ibz_sub +#undef ibz_swap +#undef ibz_to_digits +#undef ibz_two_adic + +#define ibz_abs SQISIGN_NAMESPACE_GENERIC(ibz_abs) +#define ibz_add SQISIGN_NAMESPACE_GENERIC(ibz_add) +#define ibz_bitsize SQISIGN_NAMESPACE_GENERIC(ibz_bitsize) +#define ibz_cmp SQISIGN_NAMESPACE_GENERIC(ibz_cmp) +#define ibz_cmp_int32 SQISIGN_NAMESPACE_GENERIC(ibz_cmp_int32) +#define ibz_convert_to_str SQISIGN_NAMESPACE_GENERIC(ibz_convert_to_str) +#define ibz_copy SQISIGN_NAMESPACE_GENERIC(ibz_copy) +#define ibz_copy_digits SQISIGN_NAMESPACE_GENERIC(ibz_copy_digits) +#define ibz_div SQISIGN_NAMESPACE_GENERIC(ibz_div) +#define ibz_div_2exp SQISIGN_NAMESPACE_GENERIC(ibz_div_2exp) +#define ibz_div_floor SQISIGN_NAMESPACE_GENERIC(ibz_div_floor) +#define ibz_divides SQISIGN_NAMESPACE_GENERIC(ibz_divides) +#define ibz_finalize SQISIGN_NAMESPACE_GENERIC(ibz_finalize) +#define ibz_gcd SQISIGN_NAMESPACE_GENERIC(ibz_gcd) +#define ibz_get SQISIGN_NAMESPACE_GENERIC(ibz_get) +#define ibz_init SQISIGN_NAMESPACE_GENERIC(ibz_init) +#define ibz_invmod SQISIGN_NAMESPACE_GENERIC(ibz_invmod) +#define ibz_is_even SQISIGN_NAMESPACE_GENERIC(ibz_is_even) +#define ibz_is_odd SQISIGN_NAMESPACE_GENERIC(ibz_is_odd) +#define ibz_is_one SQISIGN_NAMESPACE_GENERIC(ibz_is_one) +#define ibz_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_is_zero) +#define ibz_legendre SQISIGN_NAMESPACE_GENERIC(ibz_legendre) +#define ibz_mod SQISIGN_NAMESPACE_GENERIC(ibz_mod) +#define ibz_mod_ui SQISIGN_NAMESPACE_GENERIC(ibz_mod_ui) +#define ibz_mul SQISIGN_NAMESPACE_GENERIC(ibz_mul) +#define ibz_neg SQISIGN_NAMESPACE_GENERIC(ibz_neg) +#define ibz_pow SQISIGN_NAMESPACE_GENERIC(ibz_pow) +#define ibz_pow_mod SQISIGN_NAMESPACE_GENERIC(ibz_pow_mod) +#define ibz_print SQISIGN_NAMESPACE_GENERIC(ibz_print) +#define ibz_probab_prime SQISIGN_NAMESPACE_GENERIC(ibz_probab_prime) +#define ibz_rand_interval SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval) +#define ibz_rand_interval_bits SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_bits) +#define ibz_rand_interval_i SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_i) +#define ibz_rand_interval_minm_m SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_minm_m) +#define ibz_set SQISIGN_NAMESPACE_GENERIC(ibz_set) +#define ibz_set_from_str SQISIGN_NAMESPACE_GENERIC(ibz_set_from_str) +#define ibz_size_in_base SQISIGN_NAMESPACE_GENERIC(ibz_size_in_base) +#define ibz_sqrt SQISIGN_NAMESPACE_GENERIC(ibz_sqrt) +#define ibz_sqrt_floor SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_floor) +#define ibz_sqrt_mod_p SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_mod_p) +#define ibz_sub SQISIGN_NAMESPACE_GENERIC(ibz_sub) +#define ibz_swap SQISIGN_NAMESPACE_GENERIC(ibz_swap) +#define ibz_to_digits SQISIGN_NAMESPACE_GENERIC(ibz_to_digits) +#define ibz_two_adic SQISIGN_NAMESPACE_GENERIC(ibz_two_adic) + +// Namespacing symbols exported from integers.c: +#undef ibz_cornacchia_prime +#undef ibz_generate_random_prime + +#define ibz_cornacchia_prime SQISIGN_NAMESPACE_GENERIC(ibz_cornacchia_prime) +#define ibz_generate_random_prime SQISIGN_NAMESPACE_GENERIC(ibz_generate_random_prime) + +// Namespacing symbols exported from isog_chains.c: +#undef ec_eval_even +#undef ec_eval_small_chain +#undef ec_iso_eval +#undef ec_isomorphism + +#define ec_eval_even SQISIGN_NAMESPACE(ec_eval_even) +#define ec_eval_small_chain SQISIGN_NAMESPACE(ec_eval_small_chain) +#define ec_iso_eval SQISIGN_NAMESPACE(ec_iso_eval) +#define ec_isomorphism SQISIGN_NAMESPACE(ec_isomorphism) + +// Namespacing symbols exported from keygen.c: +#undef protocols_keygen +#undef secret_key_finalize +#undef secret_key_init + +#define protocols_keygen SQISIGN_NAMESPACE(protocols_keygen) +#define secret_key_finalize SQISIGN_NAMESPACE(secret_key_finalize) +#define secret_key_init SQISIGN_NAMESPACE(secret_key_init) + +// Namespacing symbols exported from l2.c: +#undef quat_lattice_lll +#undef quat_lll_core + +#define quat_lattice_lll SQISIGN_NAMESPACE_GENERIC(quat_lattice_lll) +#define quat_lll_core SQISIGN_NAMESPACE_GENERIC(quat_lll_core) + +// Namespacing symbols exported from lat_ball.c: +#undef quat_lattice_bound_parallelogram +#undef quat_lattice_sample_from_ball + +#define quat_lattice_bound_parallelogram SQISIGN_NAMESPACE_GENERIC(quat_lattice_bound_parallelogram) +#define quat_lattice_sample_from_ball SQISIGN_NAMESPACE_GENERIC(quat_lattice_sample_from_ball) + +// Namespacing symbols exported from lattice.c: +#undef quat_lattice_add +#undef quat_lattice_alg_elem_mul +#undef quat_lattice_conjugate_without_hnf +#undef quat_lattice_contains +#undef quat_lattice_dual_without_hnf +#undef quat_lattice_equal +#undef quat_lattice_gram +#undef quat_lattice_hnf +#undef quat_lattice_inclusion +#undef quat_lattice_index +#undef quat_lattice_intersect +#undef quat_lattice_mat_alg_coord_mul_without_hnf +#undef quat_lattice_mul +#undef quat_lattice_reduce_denom + +#define quat_lattice_add SQISIGN_NAMESPACE_GENERIC(quat_lattice_add) +#define quat_lattice_alg_elem_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_alg_elem_mul) +#define quat_lattice_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_conjugate_without_hnf) +#define quat_lattice_contains SQISIGN_NAMESPACE_GENERIC(quat_lattice_contains) +#define quat_lattice_dual_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_dual_without_hnf) +#define quat_lattice_equal SQISIGN_NAMESPACE_GENERIC(quat_lattice_equal) +#define quat_lattice_gram SQISIGN_NAMESPACE_GENERIC(quat_lattice_gram) +#define quat_lattice_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_hnf) +#define quat_lattice_inclusion SQISIGN_NAMESPACE_GENERIC(quat_lattice_inclusion) +#define quat_lattice_index SQISIGN_NAMESPACE_GENERIC(quat_lattice_index) +#define quat_lattice_intersect SQISIGN_NAMESPACE_GENERIC(quat_lattice_intersect) +#define quat_lattice_mat_alg_coord_mul_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_mat_alg_coord_mul_without_hnf) +#define quat_lattice_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_mul) +#define quat_lattice_reduce_denom SQISIGN_NAMESPACE_GENERIC(quat_lattice_reduce_denom) + +// Namespacing symbols exported from lll_applications.c: +#undef quat_lideal_lideal_mul_reduced +#undef quat_lideal_prime_norm_reduced_equivalent +#undef quat_lideal_reduce_basis + +#define quat_lideal_lideal_mul_reduced SQISIGN_NAMESPACE_GENERIC(quat_lideal_lideal_mul_reduced) +#define quat_lideal_prime_norm_reduced_equivalent SQISIGN_NAMESPACE_GENERIC(quat_lideal_prime_norm_reduced_equivalent) +#define quat_lideal_reduce_basis SQISIGN_NAMESPACE_GENERIC(quat_lideal_reduce_basis) + +// Namespacing symbols exported from lll_verification.c: +#undef ibq_vec_4_copy_ibz +#undef quat_lll_bilinear +#undef quat_lll_gram_schmidt_transposed_with_ibq +#undef quat_lll_set_ibq_parameters +#undef quat_lll_verify + +#define ibq_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_copy_ibz) +#define quat_lll_bilinear SQISIGN_NAMESPACE_GENERIC(quat_lll_bilinear) +#define quat_lll_gram_schmidt_transposed_with_ibq SQISIGN_NAMESPACE_GENERIC(quat_lll_gram_schmidt_transposed_with_ibq) +#define quat_lll_set_ibq_parameters SQISIGN_NAMESPACE_GENERIC(quat_lll_set_ibq_parameters) +#define quat_lll_verify SQISIGN_NAMESPACE_GENERIC(quat_lll_verify) + +// Namespacing symbols exported from mem.c: +#undef sqisign_secure_clear +#undef sqisign_secure_free + +#define sqisign_secure_clear SQISIGN_NAMESPACE_GENERIC(sqisign_secure_clear) +#define sqisign_secure_free SQISIGN_NAMESPACE_GENERIC(sqisign_secure_free) + +// Namespacing symbols exported from mp.c: +#undef MUL +#undef mp_add +#undef mp_compare +#undef mp_copy +#undef mp_inv_2e +#undef mp_invert_matrix +#undef mp_is_one +#undef mp_is_zero +#undef mp_mod_2exp +#undef mp_mul +#undef mp_mul2 +#undef mp_neg +#undef mp_print +#undef mp_shiftl +#undef mp_shiftr +#undef mp_sub +#undef multiple_mp_shiftl +#undef select_ct +#undef swap_ct + +#define MUL SQISIGN_NAMESPACE_GENERIC(MUL) +#define mp_add SQISIGN_NAMESPACE_GENERIC(mp_add) +#define mp_compare SQISIGN_NAMESPACE_GENERIC(mp_compare) +#define mp_copy SQISIGN_NAMESPACE_GENERIC(mp_copy) +#define mp_inv_2e SQISIGN_NAMESPACE_GENERIC(mp_inv_2e) +#define mp_invert_matrix SQISIGN_NAMESPACE_GENERIC(mp_invert_matrix) +#define mp_is_one SQISIGN_NAMESPACE_GENERIC(mp_is_one) +#define mp_is_zero SQISIGN_NAMESPACE_GENERIC(mp_is_zero) +#define mp_mod_2exp SQISIGN_NAMESPACE_GENERIC(mp_mod_2exp) +#define mp_mul SQISIGN_NAMESPACE_GENERIC(mp_mul) +#define mp_mul2 SQISIGN_NAMESPACE_GENERIC(mp_mul2) +#define mp_neg SQISIGN_NAMESPACE_GENERIC(mp_neg) +#define mp_print SQISIGN_NAMESPACE_GENERIC(mp_print) +#define mp_shiftl SQISIGN_NAMESPACE_GENERIC(mp_shiftl) +#define mp_shiftr SQISIGN_NAMESPACE_GENERIC(mp_shiftr) +#define mp_sub SQISIGN_NAMESPACE_GENERIC(mp_sub) +#define multiple_mp_shiftl SQISIGN_NAMESPACE_GENERIC(multiple_mp_shiftl) +#define select_ct SQISIGN_NAMESPACE_GENERIC(select_ct) +#define swap_ct SQISIGN_NAMESPACE_GENERIC(swap_ct) + +// Namespacing symbols exported from normeq.c: +#undef quat_change_to_O0_basis +#undef quat_lattice_O0_set +#undef quat_lattice_O0_set_extremal +#undef quat_order_elem_create +#undef quat_represent_integer +#undef quat_sampling_random_ideal_O0_given_norm + +#define quat_change_to_O0_basis SQISIGN_NAMESPACE_GENERIC(quat_change_to_O0_basis) +#define quat_lattice_O0_set SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set) +#define quat_lattice_O0_set_extremal SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set_extremal) +#define quat_order_elem_create SQISIGN_NAMESPACE_GENERIC(quat_order_elem_create) +#define quat_represent_integer SQISIGN_NAMESPACE_GENERIC(quat_represent_integer) +#define quat_sampling_random_ideal_O0_given_norm SQISIGN_NAMESPACE_GENERIC(quat_sampling_random_ideal_O0_given_norm) + +// Namespacing symbols exported from printer.c: +#undef ibz_mat_2x2_print +#undef ibz_mat_4x4_print +#undef ibz_vec_2_print +#undef ibz_vec_4_print +#undef quat_alg_elem_print +#undef quat_alg_print +#undef quat_lattice_print +#undef quat_left_ideal_print + +#define ibz_mat_2x2_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_print) +#define ibz_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_print) +#define ibz_vec_2_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_print) +#define ibz_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_print) +#define quat_alg_elem_print SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_print) +#define quat_alg_print SQISIGN_NAMESPACE_GENERIC(quat_alg_print) +#define quat_lattice_print SQISIGN_NAMESPACE_GENERIC(quat_lattice_print) +#define quat_left_ideal_print SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_print) + +// Namespacing symbols exported from random_input_generation.c: +#undef quat_test_input_random_ideal_generation +#undef quat_test_input_random_ideal_lattice_generation +#undef quat_test_input_random_lattice_generation + +#define quat_test_input_random_ideal_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_generation) +#define quat_test_input_random_ideal_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_lattice_generation) +#define quat_test_input_random_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_lattice_generation) + +// Namespacing symbols exported from rationals.c: +#undef ibq_abs +#undef ibq_add +#undef ibq_cmp +#undef ibq_copy +#undef ibq_finalize +#undef ibq_init +#undef ibq_inv +#undef ibq_is_ibz +#undef ibq_is_one +#undef ibq_is_zero +#undef ibq_mat_4x4_finalize +#undef ibq_mat_4x4_init +#undef ibq_mat_4x4_print +#undef ibq_mul +#undef ibq_neg +#undef ibq_reduce +#undef ibq_set +#undef ibq_sub +#undef ibq_to_ibz +#undef ibq_vec_4_finalize +#undef ibq_vec_4_init +#undef ibq_vec_4_print + +#define ibq_abs SQISIGN_NAMESPACE_GENERIC(ibq_abs) +#define ibq_add SQISIGN_NAMESPACE_GENERIC(ibq_add) +#define ibq_cmp SQISIGN_NAMESPACE_GENERIC(ibq_cmp) +#define ibq_copy SQISIGN_NAMESPACE_GENERIC(ibq_copy) +#define ibq_finalize SQISIGN_NAMESPACE_GENERIC(ibq_finalize) +#define ibq_init SQISIGN_NAMESPACE_GENERIC(ibq_init) +#define ibq_inv SQISIGN_NAMESPACE_GENERIC(ibq_inv) +#define ibq_is_ibz SQISIGN_NAMESPACE_GENERIC(ibq_is_ibz) +#define ibq_is_one SQISIGN_NAMESPACE_GENERIC(ibq_is_one) +#define ibq_is_zero SQISIGN_NAMESPACE_GENERIC(ibq_is_zero) +#define ibq_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_finalize) +#define ibq_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_init) +#define ibq_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_print) +#define ibq_mul SQISIGN_NAMESPACE_GENERIC(ibq_mul) +#define ibq_neg SQISIGN_NAMESPACE_GENERIC(ibq_neg) +#define ibq_reduce SQISIGN_NAMESPACE_GENERIC(ibq_reduce) +#define ibq_set SQISIGN_NAMESPACE_GENERIC(ibq_set) +#define ibq_sub SQISIGN_NAMESPACE_GENERIC(ibq_sub) +#define ibq_to_ibz SQISIGN_NAMESPACE_GENERIC(ibq_to_ibz) +#define ibq_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_finalize) +#define ibq_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_init) +#define ibq_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_print) + +// Namespacing symbols exported from sign.c: +#undef protocols_sign + +#define protocols_sign SQISIGN_NAMESPACE(protocols_sign) + +// Namespacing symbols exported from sqisign.c: +#undef sqisign_keypair +#undef sqisign_open +#undef sqisign_sign +#undef sqisign_verify + +#define sqisign_keypair SQISIGN_NAMESPACE(sqisign_keypair) +#define sqisign_open SQISIGN_NAMESPACE(sqisign_open) +#define sqisign_sign SQISIGN_NAMESPACE(sqisign_sign) +#define sqisign_verify SQISIGN_NAMESPACE(sqisign_verify) + +// Namespacing symbols exported from theta_isogenies.c: +#undef theta_chain_compute_and_eval +#undef theta_chain_compute_and_eval_randomized +#undef theta_chain_compute_and_eval_verify + +#define theta_chain_compute_and_eval SQISIGN_NAMESPACE(theta_chain_compute_and_eval) +#define theta_chain_compute_and_eval_randomized SQISIGN_NAMESPACE(theta_chain_compute_and_eval_randomized) +#define theta_chain_compute_and_eval_verify SQISIGN_NAMESPACE(theta_chain_compute_and_eval_verify) + +// Namespacing symbols exported from theta_structure.c: +#undef double_iter +#undef double_point +#undef is_product_theta_point +#undef theta_precomputation + +#define double_iter SQISIGN_NAMESPACE(double_iter) +#define double_point SQISIGN_NAMESPACE(double_point) +#define is_product_theta_point SQISIGN_NAMESPACE(is_product_theta_point) +#define theta_precomputation SQISIGN_NAMESPACE(theta_precomputation) + +// Namespacing symbols exported from verify.c: +#undef protocols_verify + +#define protocols_verify SQISIGN_NAMESPACE(protocols_verify) + +// Namespacing symbols exported from xeval.c: +#undef xeval_2 +#undef xeval_2_singular +#undef xeval_4 + +#define xeval_2 SQISIGN_NAMESPACE(xeval_2) +#define xeval_2_singular SQISIGN_NAMESPACE(xeval_2_singular) +#define xeval_4 SQISIGN_NAMESPACE(xeval_4) + +// Namespacing symbols exported from xisog.c: +#undef xisog_2 +#undef xisog_2_singular +#undef xisog_4 + +#define xisog_2 SQISIGN_NAMESPACE(xisog_2) +#define xisog_2_singular SQISIGN_NAMESPACE(xisog_2_singular) +#define xisog_4 SQISIGN_NAMESPACE(xisog_4) + + +#endif + diff --git a/src/pqm4/sqisign_lvl1/ref/theta_isogenies.c b/src/pqm4/sqisign_lvl1/ref/theta_isogenies.c new file mode 100644 index 0000000..478a9ab --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/theta_isogenies.c @@ -0,0 +1,1283 @@ +#include "theta_isogenies.h" +#include +#include +#include +#include +#include + +// Select a base change matrix in constant time, with M1 a regular +// base change matrix and M2 a precomputed base change matrix +// If option = 0 then M <- M1, else if option = 0xFF...FF then M <- M2 +static inline void +select_base_change_matrix(basis_change_matrix_t *M, + const basis_change_matrix_t *M1, + const precomp_basis_change_matrix_t *M2, + const uint32_t option) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + fp2_select(&M->m[i][j], &M1->m[i][j], &FP2_CONSTANTS[M2->m[i][j]], option); +} + +// Set a regular base change matrix from a precomputed one +static inline void +set_base_change_matrix_from_precomp(basis_change_matrix_t *res, const precomp_basis_change_matrix_t *M) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + res->m[i][j] = FP2_CONSTANTS[M->m[i][j]]; +} + +static inline void +choose_index_theta_point(fp2_t *res, int ind, const theta_point_t *T) +{ + const fp2_t *src = NULL; + switch (ind % 4) { + case 0: + src = &T->x; + break; + case 1: + src = &T->y; + break; + case 2: + src = &T->z; + break; + case 3: + src = &T->t; + break; + default: + assert(0); + } + fp2_copy(res, src); +} + +// same as apply_isomorphism method but more efficient when the t component of P is zero. +static void +apply_isomorphism_general(theta_point_t *res, + const basis_change_matrix_t *M, + const theta_point_t *P, + const bool Pt_not_zero) +{ + fp2_t x1; + theta_point_t temp; + + fp2_mul(&temp.x, &P->x, &M->m[0][0]); + fp2_mul(&x1, &P->y, &M->m[0][1]); + fp2_add(&temp.x, &temp.x, &x1); + fp2_mul(&x1, &P->z, &M->m[0][2]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&temp.y, &P->x, &M->m[1][0]); + fp2_mul(&x1, &P->y, &M->m[1][1]); + fp2_add(&temp.y, &temp.y, &x1); + fp2_mul(&x1, &P->z, &M->m[1][2]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&temp.z, &P->x, &M->m[2][0]); + fp2_mul(&x1, &P->y, &M->m[2][1]); + fp2_add(&temp.z, &temp.z, &x1); + fp2_mul(&x1, &P->z, &M->m[2][2]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&temp.t, &P->x, &M->m[3][0]); + fp2_mul(&x1, &P->y, &M->m[3][1]); + fp2_add(&temp.t, &temp.t, &x1); + fp2_mul(&x1, &P->z, &M->m[3][2]); + fp2_add(&temp.t, &temp.t, &x1); + + if (Pt_not_zero) { + fp2_mul(&x1, &P->t, &M->m[0][3]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&x1, &P->t, &M->m[1][3]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&x1, &P->t, &M->m[2][3]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&x1, &P->t, &M->m[3][3]); + fp2_add(&temp.t, &temp.t, &x1); + } + + fp2_copy(&res->x, &temp.x); + fp2_copy(&res->y, &temp.y); + fp2_copy(&res->z, &temp.z); + fp2_copy(&res->t, &temp.t); +} + +static void +apply_isomorphism(theta_point_t *res, const basis_change_matrix_t *M, const theta_point_t *P) +{ + apply_isomorphism_general(res, M, P, true); +} + +// set res = M1 * M2 with matrix multiplication +static void +base_change_matrix_multiplication(basis_change_matrix_t *res, + const basis_change_matrix_t *M1, + const basis_change_matrix_t *M2) +{ + basis_change_matrix_t tmp; + fp2_t sum, m_ik, m_kj; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + fp2_set_zero(&sum); + for (int k = 0; k < 4; k++) { + m_ik = M1->m[i][k]; + m_kj = M2->m[k][j]; + fp2_mul(&m_ik, &m_ik, &m_kj); + fp2_add(&sum, &sum, &m_ik); + } + tmp.m[i][j] = sum; + } + } + *res = tmp; +} + +// compute the theta_point corresponding to the couple of point T on an elliptic product +static void +base_change(theta_point_t *out, const theta_gluing_t *phi, const theta_couple_point_t *T) +{ + theta_point_t null_point; + + // null_point = (a : b : c : d) + // a = P1.x P2.x, b = P1.x P2.z, c = P1.z P2.x, d = P1.z P2.z + fp2_mul(&null_point.x, &T->P1.x, &T->P2.x); + fp2_mul(&null_point.y, &T->P1.x, &T->P2.z); + fp2_mul(&null_point.z, &T->P2.x, &T->P1.z); + fp2_mul(&null_point.t, &T->P1.z, &T->P2.z); + + // Apply the basis change + apply_isomorphism(out, &phi->M, &null_point); +} + +static void +action_by_translation_z_and_det(fp2_t *z_inv, fp2_t *det_inv, const ec_point_t *P4, const ec_point_t *P2) +{ + // Store the Z-coordinate to invert + fp2_copy(z_inv, &P4->z); + + // Then collect detij = xij wij - uij zij + fp2_t tmp; + fp2_mul(det_inv, &P4->x, &P2->z); + fp2_mul(&tmp, &P4->z, &P2->x); + fp2_sub(det_inv, det_inv, &tmp); +} + +static void +action_by_translation_compute_matrix(translation_matrix_t *G, + const ec_point_t *P4, + const ec_point_t *P2, + const fp2_t *z_inv, + const fp2_t *det_inv) +{ + fp2_t tmp; + + // Gi.g10 = uij xij /detij - xij/zij + fp2_mul(&tmp, &P4->x, z_inv); + fp2_mul(&G->g10, &P4->x, &P2->x); + fp2_mul(&G->g10, &G->g10, det_inv); + fp2_sub(&G->g10, &G->g10, &tmp); + + // Gi.g11 = uij zij * detij + fp2_mul(&G->g11, &P2->x, det_inv); + fp2_mul(&G->g11, &G->g11, &P4->z); + + // Gi.g00 = -Gi.g11 + fp2_neg(&G->g00, &G->g11); + + // Gi.g01 = - wij zij detij + fp2_mul(&G->g01, &P2->z, det_inv); + fp2_mul(&G->g01, &G->g01, &P4->z); + fp2_neg(&G->g01, &G->g01); +} + +// Returns 1 if the basis is as expected and 0 otherwise +// We only expect this to fail for malformed signatures, so +// do not require this to run in constant time. +static int +verify_two_torsion(const theta_couple_point_t *K1_2, const theta_couple_point_t *K2_2, const theta_couple_curve_t *E12) +{ + // First check if any point in K1_2 or K2_2 is zero, if they are then the points did not have + // order 8 when we started gluing + if (ec_is_zero(&K1_2->P1) | ec_is_zero(&K1_2->P2) | ec_is_zero(&K2_2->P1) | ec_is_zero(&K2_2->P2)) { + return 0; + } + + // Now ensure that P1, Q1 and P2, Q2 are independent. For points of order two this means + // that they're not the same + if (ec_is_equal(&K1_2->P1, &K2_2->P1) | ec_is_equal(&K1_2->P2, &K2_2->P2)) { + return 0; + } + + // Finally, double points to ensure all points have order exactly 0 + theta_couple_point_t O1, O2; + double_couple_point(&O1, K1_2, E12); + double_couple_point(&O2, K2_2, E12); + // If this check fails then the points had order 2*f for some f, and the kernel is malformed. + if (!(ec_is_zero(&O1.P1) & ec_is_zero(&O1.P2) & ec_is_zero(&O2.P1) & ec_is_zero(&O2.P2))) { + return 0; + } + + return 1; +} + +// Computes the action by translation for four points +// (P1, P2) and (Q1, Q2) on E1 x E2 simultaneously to +// save on inversions. +// Returns 0 if any of Pi or Qi does not have order 2 +// and 1 otherwise +static int +action_by_translation(translation_matrix_t *Gi, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute points of order 2 from Ki_4 + theta_couple_point_t K1_2, K2_2; + double_couple_point(&K1_2, K1_4, E12); + double_couple_point(&K2_2, K2_4, E12); + + if (!verify_two_torsion(&K1_2, &K2_2, E12)) { + return 0; + } + + // We need to invert four Z coordinates and + // four determinants which we do with batched + // inversion + fp2_t inverses[8]; + action_by_translation_z_and_det(&inverses[0], &inverses[4], &K1_4->P1, &K1_2.P1); + action_by_translation_z_and_det(&inverses[1], &inverses[5], &K1_4->P2, &K1_2.P2); + action_by_translation_z_and_det(&inverses[2], &inverses[6], &K2_4->P1, &K2_2.P1); + action_by_translation_z_and_det(&inverses[3], &inverses[7], &K2_4->P2, &K2_2.P2); + + fp2_batched_inv(inverses, 8); + if (fp2_is_zero(&inverses[0])) + return 0; // something was wrong with our input (which somehow was not caught by + // verify_two_torsion) + + action_by_translation_compute_matrix(&Gi[0], &K1_4->P1, &K1_2.P1, &inverses[0], &inverses[4]); + action_by_translation_compute_matrix(&Gi[1], &K1_4->P2, &K1_2.P2, &inverses[1], &inverses[5]); + action_by_translation_compute_matrix(&Gi[2], &K2_4->P1, &K2_2.P1, &inverses[2], &inverses[6]); + action_by_translation_compute_matrix(&Gi[3], &K2_4->P2, &K2_2.P2, &inverses[3], &inverses[7]); + + return 1; +} + +// Given the appropriate four torsion, computes the +// change of basis to compute the correct theta null +// point. +// Returns 0 if the order of K1_4 or K2_4 is not 4 +static int +gluing_change_of_basis(basis_change_matrix_t *M, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute the four 2x2 matrices for the action by translation + // on the four points: + translation_matrix_t Gi[4]; + if (!action_by_translation(Gi, K1_4, K2_4, E12)) + return 0; + + // Computation of the 4x4 matrix from Mij + // t001, t101 (resp t002, t102) first column of M11 * M21 (resp M12 * M22) + fp2_t t001, t101, t002, t102, tmp; + + fp2_mul(&t001, &Gi[0].g00, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g01, &Gi[2].g10); + fp2_add(&t001, &t001, &tmp); + + fp2_mul(&t101, &Gi[0].g10, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g11, &Gi[2].g10); + fp2_add(&t101, &t101, &tmp); + + fp2_mul(&t002, &Gi[1].g00, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g01, &Gi[3].g10); + fp2_add(&t002, &t002, &tmp); + + fp2_mul(&t102, &Gi[1].g10, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g11, &Gi[3].g10); + fp2_add(&t102, &t102, &tmp); + + // trace for the first row + fp2_set_one(&M->m[0][0]); + fp2_mul(&tmp, &t001, &t002); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + + fp2_mul(&M->m[0][1], &t001, &t102); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + + fp2_mul(&M->m[0][2], &t101, &t002); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + + fp2_mul(&M->m[0][3], &t101, &t102); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + + // Compute the action of (0,out.K2_4.P2) for the second row + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][1]); + fp2_mul(&M->m[1][0], &Gi[3].g00, &M->m[0][0]); + fp2_add(&M->m[1][0], &M->m[1][0], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][1]); + fp2_mul(&M->m[1][1], &Gi[3].g10, &M->m[0][0]); + fp2_add(&M->m[1][1], &M->m[1][1], &tmp); + + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][3]); + fp2_mul(&M->m[1][2], &Gi[3].g00, &M->m[0][2]); + fp2_add(&M->m[1][2], &M->m[1][2], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][3]); + fp2_mul(&M->m[1][3], &Gi[3].g10, &M->m[0][2]); + fp2_add(&M->m[1][3], &M->m[1][3], &tmp); + + // compute the action of (K1_4.P1,0) for the third row + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][2]); + fp2_mul(&M->m[2][0], &Gi[0].g00, &M->m[0][0]); + fp2_add(&M->m[2][0], &M->m[2][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][3]); + fp2_mul(&M->m[2][1], &Gi[0].g00, &M->m[0][1]); + fp2_add(&M->m[2][1], &M->m[2][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][2]); + fp2_mul(&M->m[2][2], &Gi[0].g10, &M->m[0][0]); + fp2_add(&M->m[2][2], &M->m[2][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][3]); + fp2_mul(&M->m[2][3], &Gi[0].g10, &M->m[0][1]); + fp2_add(&M->m[2][3], &M->m[2][3], &tmp); + + // compute the action of (K1_4.P1,K2_4.P2) for the final row + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][2]); + fp2_mul(&M->m[3][0], &Gi[0].g00, &M->m[1][0]); + fp2_add(&M->m[3][0], &M->m[3][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][3]); + fp2_mul(&M->m[3][1], &Gi[0].g00, &M->m[1][1]); + fp2_add(&M->m[3][1], &M->m[3][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][2]); + fp2_mul(&M->m[3][2], &Gi[0].g10, &M->m[1][0]); + fp2_add(&M->m[3][2], &M->m[3][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][3]); + fp2_mul(&M->m[3][3], &Gi[0].g10, &M->m[1][1]); + fp2_add(&M->m[3][3], &M->m[3][3], &tmp); + + return 1; +} + +/** + * @brief Compute the gluing isogeny from an elliptic product + * + * @param out Output: the theta_gluing + * @param K1_8 a couple point + * @param E12 an elliptic curve product + * @param K2_8 a point in E2[8] + * + * out : E1xE2 -> A of kernel [4](K1_8,K2_8) + * if the kernel supplied has the incorrect order, or gluing seems malformed, + * returns 0, otherwise returns 1. + */ +static int +gluing_compute(theta_gluing_t *out, + const theta_couple_curve_t *E12, + const theta_couple_jac_point_t *xyK1_8, + const theta_couple_jac_point_t *xyK2_8, + bool verify) +{ + // Ensure that we have been given the eight torsion +#ifndef NDEBUG + { + int check = test_jac_order_twof(&xyK1_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK1_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK1_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P2 does not have order 8"); + } +#endif + + out->xyK1_8 = *xyK1_8; + out->domain = *E12; + + // Given points in E[8] x E[8] we need the four torsion below + theta_couple_jac_point_t xyK1_4, xyK2_4; + + double_couple_jac_point(&xyK1_4, xyK1_8, E12); + double_couple_jac_point(&xyK2_4, xyK2_8, E12); + + // Convert from (X:Y:Z) coordinates to (X:Z) + theta_couple_point_t K1_8, K2_8; + theta_couple_point_t K1_4, K2_4; + + couple_jac_to_xz(&K1_8, xyK1_8); + couple_jac_to_xz(&K2_8, xyK2_8); + couple_jac_to_xz(&K1_4, &xyK1_4); + couple_jac_to_xz(&K2_4, &xyK2_4); + + // Set the basis change matrix, if we have not been given a valid K[8] for this computation + // gluing_change_of_basis will detect this and return 0 + if (!gluing_change_of_basis(&out->M, &K1_4, &K2_4, E12)) { + debug_print("gluing failed as kernel does not have correct order"); + return 0; + } + + // apply the base change to the kernel + theta_point_t TT1, TT2; + + base_change(&TT1, out, &K1_8); + base_change(&TT2, out, &K2_8); + + // compute the codomain + to_squared_theta(&TT1, &TT1); + to_squared_theta(&TT2, &TT2); + + // If the kernel is well formed then TT1.t and TT2.t are zero + // if they are not, we exit early as the signature we are validating + // is probably malformed + if (!(fp2_is_zero(&TT1.t) & fp2_is_zero(&TT2.t))) { + debug_print("gluing failed TT1.t or TT2.t is not zero"); + return 0; + } + // Test our projective factors are non zero + if (fp2_is_zero(&TT1.x) | fp2_is_zero(&TT2.x) | fp2_is_zero(&TT1.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT1.z)) + return 0; // invalid input + + // Projective factor: Ax + fp2_mul(&out->codomain.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.y, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.z, &TT1.x, &TT2.z); + fp2_set_zero(&out->codomain.t); + // Projective factor: ABCxz + fp2_mul(&out->precomputation.x, &TT1.y, &TT2.z); + fp2_copy(&out->precomputation.y, &out->codomain.z); + fp2_copy(&out->precomputation.z, &out->codomain.y); + fp2_set_zero(&out->precomputation.t); + + // Compute the two components of phi(K1_8) = (x:x:y:y). + fp2_mul(&out->imageK1_8.x, &TT1.x, &out->precomputation.x); + fp2_mul(&out->imageK1_8.y, &TT1.z, &out->precomputation.z); + + // If K1_8 and K2_8 are our 8-torsion points, this ensures that the + // 4-torsion points [2]K1_8 and [2]K2_8 are isotropic. + if (verify) { + fp2_t t1, t2; + fp2_mul(&t1, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&out->imageK1_8.x, &t1)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t2, &t1)) + return 0; + } + + // compute the final codomain + hadamard(&out->codomain, &out->codomain); + return 1; +} + +// sub routine of the gluing eval +static void +gluing_eval_point(theta_point_t *image, const theta_couple_jac_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T1, T2; + add_components_t add_comp1, add_comp2; + + // Compute the cross addition components of P1+Q1 and P2+Q2 + jac_to_xz_add_components(&add_comp1, &P->P1, &phi->xyK1_8.P1, &phi->domain.E1); + jac_to_xz_add_components(&add_comp2, &P->P2, &phi->xyK1_8.P2, &phi->domain.E2); + + // Compute T1 and T2 derived from the cross addition components. + fp2_mul(&T1.x, &add_comp1.u, &add_comp2.u); // T1x = u1u2 + fp2_mul(&T2.t, &add_comp1.v, &add_comp2.v); // T2t = v1v2 + fp2_add(&T1.x, &T1.x, &T2.t); // T1x = u1u2 + v1v2 + fp2_mul(&T1.y, &add_comp1.u, &add_comp2.w); // T1y = u1w2 + fp2_mul(&T1.z, &add_comp1.w, &add_comp2.u); // T1z = w1u2 + fp2_mul(&T1.t, &add_comp1.w, &add_comp2.w); // T1t = w1w2 + fp2_add(&T2.x, &add_comp1.u, &add_comp1.v); // T2x = (u1+v1) + fp2_add(&T2.y, &add_comp2.u, &add_comp2.v); // T2y = (u2+v2) + fp2_mul(&T2.x, &T2.x, &T2.y); // T2x = (u1+v1)(u2+v2) + fp2_sub(&T2.x, &T2.x, &T1.x); // T1x = v1u2 + u1v2 + fp2_mul(&T2.y, &add_comp1.v, &add_comp2.w); // T2y = v1w2 + fp2_mul(&T2.z, &add_comp1.w, &add_comp2.v); // T2z = w1v2 + fp2_set_zero(&T2.t); // T2t = 0 + + // Apply the basis change and compute their respective square + // theta(P+Q) = M.T1 - M.T2 and theta(P-Q) = M.T1 + M.T2 + apply_isomorphism_general(&T1, &phi->M, &T1, true); + apply_isomorphism_general(&T2, &phi->M, &T2, false); + pointwise_square(&T1, &T1); + pointwise_square(&T2, &T2); + + // the difference between the two is therefore theta(P+Q)theta(P-Q) + // whose hadamard transform is then the product of the dual + // theta_points of phi(P) and phi(Q). + fp2_sub(&T1.x, &T1.x, &T2.x); + fp2_sub(&T1.y, &T1.y, &T2.y); + fp2_sub(&T1.z, &T1.z, &T2.z); + fp2_sub(&T1.t, &T1.t, &T2.t); + hadamard(&T1, &T1); + + // Compute (x, y, z, t) + // As imageK1_8 = (x:x:y:y), its inverse is (y:y:x:x). + fp2_mul(&image->x, &T1.x, &phi->imageK1_8.y); + fp2_mul(&image->y, &T1.y, &phi->imageK1_8.y); + fp2_mul(&image->z, &T1.z, &phi->imageK1_8.x); + fp2_mul(&image->t, &T1.t, &phi->imageK1_8.x); + + hadamard(image, image); +} + +// Same as gluing_eval_point but in the very special case where we already know that the point will +// have a zero coordinate at the place where the zero coordinate of the dual_theta_nullpoint would +// have made the computation difficult +static int +gluing_eval_point_special_case(theta_point_t *image, const theta_couple_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T; + + // Apply the basis change + base_change(&T, phi, P); + + // Apply the to_squared_theta transform + to_squared_theta(&T, &T); + + // This coordinate should always be 0 in a gluing because D=0. + // If this is not the case, something went very wrong, so reject + if (!fp2_is_zero(&T.t)) + return 0; + + // Compute (x, y, z, t) + fp2_mul(&image->x, &T.x, &phi->precomputation.x); + fp2_mul(&image->y, &T.y, &phi->precomputation.y); + fp2_mul(&image->z, &T.z, &phi->precomputation.z); + fp2_set_zero(&image->t); + + hadamard(image, image); + return 1; +} + +/** + * @brief Evaluate a gluing isogeny from an elliptic product on a basis + * + * @param image1 Output: the theta_point of the image of the first couple of points + * @param image2 Output : the theta point of the image of the second couple of points + * @param xyT1: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param xyT2: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param phi : a gluing isogeny E1 x E2 -> A + * + **/ +static void +gluing_eval_basis(theta_point_t *image1, + theta_point_t *image2, + const theta_couple_jac_point_t *xyT1, + const theta_couple_jac_point_t *xyT2, + const theta_gluing_t *phi) +{ + gluing_eval_point(image1, xyT1, phi); + gluing_eval_point(image2, xyT2, phi); +} + +/** + * @brief Compute a (2,2) isogeny in dimension 2 in the theta_model + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_8 a point in A[8] + * @param T2_8 a point in A[8] + * @param hadamard_bool_1 a boolean used for the last two steps of the chain + * @param hadamard_bool_2 a boolean used for the last two steps of the chain + * + * out : A -> B of kernel [4](T1_8,T2_8) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * verify: add extra sanity check to ensure our 8-torsion points are coherent with the isogeny + * + */ +static int +theta_isogeny_compute(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_8, + const theta_point_t *T2_8, + bool hadamard_bool_1, + bool hadamard_bool_2, + bool verify) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_8; + out->T2_8 = *T2_8; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_8); + to_squared_theta(&TT1, &TT1); + hadamard(&TT2, T2_8); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_8); + to_squared_theta(&TT2, T2_8); + } + + fp2_t t1, t2; + + // Test that our projective factor ABCDxzw is non zero, where + // TT1=(Ax, Bx, Cy, Dy), TT2=(Az, Bw, Cz, Dw) + // But ABCDxzw=0 can only happen if we had an unexpected splitting in + // the isogeny chain. + // In either case reject + // (this is not strictly necessary, we could just return (0:0:0:0)) + if (fp2_is_zero(&TT2.x) | fp2_is_zero(&TT2.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT2.t) | fp2_is_zero(&TT1.x) | + fp2_is_zero(&TT1.y)) + return 0; + + fp2_mul(&t1, &TT1.x, &TT2.y); + fp2_mul(&t2, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.null_point.x, &TT2.x, &t1); + fp2_mul(&out->codomain.null_point.y, &TT2.y, &t2); + fp2_mul(&out->codomain.null_point.z, &TT2.z, &t1); + fp2_mul(&out->codomain.null_point.t, &TT2.t, &t2); + fp2_t t3; + fp2_mul(&t3, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.x, &t3, &TT1.y); + fp2_mul(&out->precomputation.y, &t3, &TT1.x); + fp2_copy(&out->precomputation.z, &out->codomain.null_point.t); + fp2_copy(&out->precomputation.t, &out->codomain.null_point.z); + + // If T1_8 and T2_8 are our 8-torsion points, this ensures that the + // 4-torsion points 2T1_8 and 2T2_8 are isotropic. + if (verify) { + fp2_mul(&t1, &TT1.x, &out->precomputation.x); + fp2_mul(&t2, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT1.z, &out->precomputation.z); + fp2_mul(&t2, &TT1.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.y, &out->precomputation.y); + fp2_mul(&t2, &TT2.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + } + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } + return 1; +} + +/** + * @brief Compute a (2,2) isogeny when only the 4 torsion above the kernel is known and not the 8 + * torsion + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_4 a point in A[4] + * @param T2_4 a point in A[4] + * @param hadamard_bool_1 a boolean + * @param hadamard_bool_2 a boolean + * + * out : A -> B of kernel [2](T1_4,T2_4) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_4(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_4, + const theta_point_t *T2_4, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_4; + out->T2_8 = *T2_4; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + // we will compute: + // TT1 = (xAB, _ , xCD, _) + // TT2 = (AA,BB,CC,DD) + + // fp2_t xA_inv,zA_inv,tB_inv; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_4); + to_squared_theta(&TT1, &TT1); + + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_4); + to_squared_theta(&TT2, &A->null_point); + } + + fp2_t sqaabb, sqaacc; + fp2_mul(&sqaabb, &TT2.x, &TT2.y); + fp2_mul(&sqaacc, &TT2.x, &TT2.z); + // No need to check the square roots, only used for signing. + // sqaabb = sqrt(AA*BB) + fp2_sqrt(&sqaabb); + // sqaacc = sqrt(AA*CC) + fp2_sqrt(&sqaacc); + + // we compute out->codomain.null_point = (xAB * sqaacc * AA, xAB *sqaabb *sqaacc, xCD*sqaabb * + // AA) out->precomputation = (xAB * BB * CC *DD , sqaabb * CC * DD * xAB , sqaacc * BB* DD * xAB + // , xCD * sqaabb *sqaacc * BB) + + fp2_mul(&out->codomain.null_point.y, &sqaabb, &sqaacc); + fp2_mul(&out->precomputation.t, &out->codomain.null_point.y, &TT1.z); + fp2_mul(&out->codomain.null_point.y, &out->codomain.null_point.y, + &TT1.x); // done for out->codomain.null_point.y + + fp2_mul(&out->codomain.null_point.t, &TT1.z, &sqaabb); + fp2_mul(&out->codomain.null_point.t, &out->codomain.null_point.t, + &TT2.x); // done for out->codomain.null_point.t + + fp2_mul(&out->codomain.null_point.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.null_point.z, &out->codomain.null_point.x, + &TT2.z); // done for out->codomain.null_point.z + fp2_mul(&out->codomain.null_point.x, &out->codomain.null_point.x, + &sqaacc); // done for out->codomain.null_point.x + + fp2_mul(&out->precomputation.x, &TT1.x, &TT2.t); + fp2_mul(&out->precomputation.z, &out->precomputation.x, &TT2.y); + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.z); + fp2_mul(&out->precomputation.y, &out->precomputation.x, &sqaabb); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &out->precomputation.z, &sqaacc); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +/** + * @brief Compute a (2,2) isogeny when only the kernel is known and not the 8 or 4 torsion above + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_2 a point in A[2] + * @param T2_2 a point in A[2] + * @param hadamard_bool_1 a boolean + * @param boo2 a boolean + * + * out : A -> B of kernel (T1_2,T2_2) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_2(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_2, + const theta_point_t *T2_2, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_2; + out->T2_8 = *T2_2; + out->codomain.precomputation = false; + + theta_point_t TT2; + // we will compute: + // TT2 = (AA,BB,CC,DD) + + if (hadamard_bool_1) { + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT2, &A->null_point); + } + + // we compute out->codomain.null_point = (AA,sqaabb, sqaacc, sqaadd) + // out->precomputation = ( BB * CC *DD , sqaabb * CC * DD , sqaacc * BB* DD , sqaadd * BB * CC) + fp2_copy(&out->codomain.null_point.x, &TT2.x); + fp2_mul(&out->codomain.null_point.y, &TT2.x, &TT2.y); + fp2_mul(&out->codomain.null_point.z, &TT2.x, &TT2.z); + fp2_mul(&out->codomain.null_point.t, &TT2.x, &TT2.t); + // No need to check the square roots, only used for signing. + fp2_sqrt(&out->codomain.null_point.y); + fp2_sqrt(&out->codomain.null_point.z); + fp2_sqrt(&out->codomain.null_point.t); + + fp2_mul(&out->precomputation.x, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.y, + &out->precomputation.x, + &out->codomain.null_point.y); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &TT2.t, &out->codomain.null_point.z); + fp2_mul(&out->precomputation.z, &out->precomputation.z, &TT2.y); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &TT2.z, &out->codomain.null_point.t); + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +static void +theta_isogeny_eval(theta_point_t *out, const theta_isogeny_t *phi, const theta_point_t *P) +{ + if (phi->hadamard_bool_1) { + hadamard(out, P); + to_squared_theta(out, out); + } else { + to_squared_theta(out, P); + } + fp2_mul(&out->x, &out->x, &phi->precomputation.x); + fp2_mul(&out->y, &out->y, &phi->precomputation.y); + fp2_mul(&out->z, &out->z, &phi->precomputation.z); + fp2_mul(&out->t, &out->t, &phi->precomputation.t); + + if (phi->hadamard_bool_2) { + hadamard(out, out); + } +} + +#if defined(ENABLE_SIGN) +// Sample a random secret index in [0, 5] to select one of the 6 normalisation +// matrices for the normalisation of the output of the (2,2)-chain during +// splitting +static unsigned char +sample_random_index(void) +{ + // To avoid bias in reduction we should only consider integers smaller + // than 2^32 which are a multiple of 6, so we only reduce bytes with a + // value in [0, 4294967292-1]. + // We have 4294967292/2^32 = ~99.9999999% chance that the first try is "good". + unsigned char seed_arr[4]; + uint32_t seed; + + do { + randombytes(seed_arr, 4); + seed = (seed_arr[0] | (seed_arr[1] << 8) | (seed_arr[2] << 16) | (seed_arr[3] << 24)); + } while (seed >= 4294967292U); + + uint32_t secret_index = seed - (((uint64_t)seed * 2863311531U) >> 34) * 6; + assert(secret_index == seed % 6); // ensure the constant time trick above works + return (unsigned char)secret_index; +} +#endif + +static bool +splitting_compute(theta_splitting_t *out, const theta_structure_t *A, int zero_index, bool randomize) + +{ + // init + uint32_t ctl; + uint32_t count = 0; + fp2_t U_cst, t1, t2; + + memset(&out->M, 0, sizeof(basis_change_matrix_t)); + + // enumerate through all indices + for (int i = 0; i < 10; i++) { + fp2_set_zero(&U_cst); + for (int t = 0; t < 4; t++) { + // Iterate through the null point + choose_index_theta_point(&t2, t, &A->null_point); + choose_index_theta_point(&t1, t ^ EVEN_INDEX[i][1], &A->null_point); + + // Compute t1 * t2 + fp2_mul(&t1, &t1, &t2); + // If CHI_EVAL(i,t) is +1 we want ctl to be 0 and + // If CHI_EVAL(i,t) is -1 we want ctl to be 0xFF..FF + ctl = (uint32_t)(CHI_EVAL[EVEN_INDEX[i][0]][t] >> 1); + assert(ctl == 0 || ctl == 0xffffffff); + + fp2_neg(&t2, &t1); + fp2_select(&t1, &t1, &t2, ctl); + + // Then we compute U_cst ± (t1 * t2) + fp2_add(&U_cst, &U_cst, &t1); + } + + // If U_cst is 0 then update the splitting matrix + ctl = fp2_is_zero(&U_cst); + count -= ctl; + select_base_change_matrix(&out->M, &out->M, &SPLITTING_TRANSFORMS[i], ctl); + if (zero_index != -1 && i == zero_index && + !ctl) { // extra checks if we know exactly where the 0 index should be + return 0; + } + } + +#if defined(ENABLE_SIGN) + // Pick a random normalization matrix + if (randomize) { + unsigned char secret_index = sample_random_index(); + basis_change_matrix_t Mrandom; + + set_base_change_matrix_from_precomp(&Mrandom, &NORMALIZATION_TRANSFORMS[0]); + + // Use a constant time selection to pick the index we want + for (unsigned char i = 1; i < 6; i++) { + // When i == secret_index, mask == 0 and 0xFF..FF otherwise + int32_t mask = i - secret_index; + mask = (mask | -mask) >> 31; + select_base_change_matrix(&Mrandom, &Mrandom, &NORMALIZATION_TRANSFORMS[i], ~mask); + } + base_change_matrix_multiplication(&out->M, &Mrandom, &out->M); + } +#else + assert(!randomize); +#endif + + // apply the isomorphism to ensure the null point is compatible with splitting + apply_isomorphism(&out->B.null_point, &out->M, &A->null_point); + + // splitting was successful only if exactly one zero was identified + return count == 1; +} + +static int +theta_product_structure_to_elliptic_product(theta_couple_curve_t *E12, theta_structure_t *A) +{ + fp2_t xx, yy; + + // This should be true from our computations in splitting_compute + // but still check this for sanity + if (!is_product_theta_point(&A->null_point)) + return 0; + + ec_curve_init(&(E12->E1)); + ec_curve_init(&(E12->E2)); + + // A valid elliptic theta null point has no zero coordinate + if (fp2_is_zero(&A->null_point.x) | fp2_is_zero(&A->null_point.y) | fp2_is_zero(&A->null_point.z)) + return 0; + + // xx = x², yy = y² + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.y); + // xx = x^4, yy = y^4 + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A2 = -2(x^4+y^4)/(x^4-y^4) + fp2_add(&E12->E2.A, &xx, &yy); + fp2_sub(&E12->E2.C, &xx, &yy); + fp2_add(&E12->E2.A, &E12->E2.A, &E12->E2.A); + fp2_neg(&E12->E2.A, &E12->E2.A); + + // same with x,z + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.z); + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A1 = -2(x^4+z^4)/(x^4-z^4) + fp2_add(&E12->E1.A, &xx, &yy); + fp2_sub(&E12->E1.C, &xx, &yy); + fp2_add(&E12->E1.A, &E12->E1.A, &E12->E1.A); + fp2_neg(&E12->E1.A, &E12->E1.A); + + if (fp2_is_zero(&E12->E1.C) | fp2_is_zero(&E12->E2.C)) + return 0; + + return 1; +} + +static int +theta_point_to_montgomery_point(theta_couple_point_t *P12, const theta_point_t *P, const theta_structure_t *A) +{ + fp2_t temp; + const fp2_t *x, *z; + + if (!is_product_theta_point(P)) + return 0; + + x = &P->x; + z = &P->y; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->z; + z = &P->t; + } + if (fp2_is_zero(x) & fp2_is_zero(z)) { + return 0; // at this point P=(0:0:0:0) so is invalid + } + // P2.X = A.null_point.y * P.x + A.null_point.x * P.y + // P2.Z = - A.null_point.y * P.x + A.null_point.x * P.y + fp2_mul(&P12->P2.x, &A->null_point.y, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P2.z, &temp, &P12->P2.x); + fp2_add(&P12->P2.x, &P12->P2.x, &temp); + + x = &P->x; + z = &P->z; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->y; + z = &P->t; + } + // P1.X = A.null_point.z * P.x + A.null_point.x * P.z + // P1.Z = -A.null_point.z * P.x + A.null_point.x * P.z + fp2_mul(&P12->P1.x, &A->null_point.z, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P1.z, &temp, &P12->P1.x); + fp2_add(&P12->P1.x, &P12->P1.x, &temp); + return 1; +} + +static int +_theta_chain_compute_impl(unsigned n, + theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP, + bool verify, + bool randomize) +{ + theta_structure_t theta; + + // lift the basis + theta_couple_jac_point_t xyT1, xyT2; + + ec_basis_t bas1 = { .P = ker->T1.P1, .Q = ker->T2.P1, .PmQ = ker->T1m2.P1 }; + ec_basis_t bas2 = { .P = ker->T1.P2, .Q = ker->T2.P2, .PmQ = ker->T1m2.P2 }; + if (!lift_basis(&xyT1.P1, &xyT2.P1, &bas1, &E12->E1)) + return 0; + if (!lift_basis(&xyT1.P2, &xyT2.P2, &bas2, &E12->E2)) + return 0; + + const unsigned extra = HD_extra_torsion * extra_torsion; + +#ifndef NDEBUG + assert(extra == 0 || extra == 2); // only cases implemented + if (!test_point_order_twof(&bas2.P, &E12->E2, n + extra)) + debug_print("bas2.P does not have correct order"); + + if (!test_jac_order_twof(&xyT2.P2, &E12->E2, n + extra)) + debug_print("xyT2.P2 does not have correct order"); +#endif + + theta_point_t pts[numP ? numP : 1]; + + int space = 1; + for (unsigned i = 1; i < n; i *= 2) + ++space; + + uint16_t todo[space]; + todo[0] = n - 2 + extra; + + int current = 0; + + // kernel points for the gluing isogeny + theta_couple_jac_point_t jacQ1[space], jacQ2[space]; + jacQ1[0] = xyT1; + jacQ2[0] = xyT2; + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + // the gluing isogeny is quite a bit more expensive than the others, + // so we adjust the usual splitting rule here a little bit: towards + // the end of the doubling chain it will be cheaper to recompute the + // doublings after evaluation than to push the intermediate points. + const unsigned num_dbls = todo[current - 1] >= 16 ? todo[current - 1] / 2 : todo[current - 1] - 1; + assert(num_dbls && num_dbls < todo[current - 1]); + double_couple_jac_point_iter(&jacQ1[current], num_dbls, &jacQ1[current - 1], E12); + double_couple_jac_point_iter(&jacQ2[current], num_dbls, &jacQ2[current - 1], E12); + todo[current] = todo[current - 1] - num_dbls; + } + + // kernel points for the remaining isogeny steps + theta_point_t thetaQ1[space], thetaQ2[space]; + + // the gluing step + theta_gluing_t first_step; + { + assert(todo[current] == 1); + + // compute the gluing isogeny + if (!gluing_compute(&first_step, E12, &jacQ1[current], &jacQ2[current], verify)) + return 0; + + // evaluate + for (unsigned j = 0; j < numP; ++j) { + assert(ec_is_zero(&P12[j].P1) || ec_is_zero(&P12[j].P2)); + if (!gluing_eval_point_special_case(&pts[j], &P12[j], &first_step)) + return 0; + } + + // push kernel points through gluing isogeny + for (int j = 0; j < current; ++j) { + gluing_eval_basis(&thetaQ1[j], &thetaQ2[j], &jacQ1[j], &jacQ2[j], &first_step); + --todo[j]; + } + + --current; + } + + // set-up the theta_structure for the first codomain + theta.null_point = first_step.codomain; + theta.precomputation = 0; + theta_precomputation(&theta); + + theta_isogeny_t step; + + // and now we do the remaining steps + for (unsigned i = 1; current >= 0 && todo[current]; ++i) { + assert(current < space); + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + const unsigned num_dbls = todo[current - 1] / 2; + assert(num_dbls && num_dbls < todo[current - 1]); + double_iter(&thetaQ1[current], &theta, &thetaQ1[current - 1], num_dbls); + double_iter(&thetaQ2[current], &theta, &thetaQ2[current - 1], num_dbls); + todo[current] = todo[current - 1] - num_dbls; + } + + // computing the next step + int ret; + if (i == n - 2) // penultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 0, verify); + else if (i == n - 1) // ultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 1, 0, false); + else + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 1, verify); + if (!ret) + return 0; + + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + + // updating the codomain + theta = step.codomain; + + // pushing the kernel + assert(todo[current] == 1); + for (int j = 0; j < current; ++j) { + theta_isogeny_eval(&thetaQ1[j], &step, &thetaQ1[j]); + theta_isogeny_eval(&thetaQ2[j], &step, &thetaQ2[j]); + assert(todo[j]); + --todo[j]; + } + + --current; + } + + assert(current == -1); + + if (!extra_torsion) { + if (n >= 3) { + // in the last step we've skipped pushing the kernel since current was == 0, let's do it now + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + } + + // penultimate step + theta_isogeny_compute_4(&step, &theta, &thetaQ1[0], &thetaQ2[0], 0, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + + // ultimate step + theta_isogeny_compute_2(&step, &theta, &thetaQ1[0], &thetaQ2[0], 1, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + } + + // final splitting step + theta_splitting_t last_step; + + bool is_split = splitting_compute(&last_step, &theta, extra_torsion ? 8 : -1, randomize); + + if (!is_split) { + debug_print("kernel did not generate an isogeny between elliptic products"); + return 0; + } + + if (!theta_product_structure_to_elliptic_product(E34, &last_step.B)) + return 0; + + // evaluate + for (size_t j = 0; j < numP; ++j) { + apply_isomorphism(&pts[j], &last_step.M, &pts[j]); + if (!theta_point_to_montgomery_point(&P12[j], &pts[j], &last_step.B)) + return 0; + } + + return 1; +} + +int +theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, false); +} + +// Like theta_chain_compute_and_eval, adding extra verification checks; +// used in the signature verification +int +theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, true, false); +} + +int +theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, true); +} diff --git a/src/pqm4/sqisign_lvl1/ref/theta_isogenies.h b/src/pqm4/sqisign_lvl1/ref/theta_isogenies.h new file mode 100644 index 0000000..d151811 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/theta_isogenies.h @@ -0,0 +1,18 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta isogeny header + */ + +#ifndef THETA_ISOGENY_H +#define THETA_ISOGENY_H + +#include +#include +#include +#include "theta_structure.h" +#include +#include + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/theta_structure.c b/src/pqm4/sqisign_lvl1/ref/theta_structure.c new file mode 100644 index 0000000..ce97ac6 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/theta_structure.c @@ -0,0 +1,78 @@ +#include "theta_structure.h" +#include + +void +theta_precomputation(theta_structure_t *A) +{ + + if (A->precomputation) { + return; + } + + theta_point_t A_dual; + to_squared_theta(&A_dual, &A->null_point); + + fp2_t t1, t2; + fp2_mul(&t1, &A_dual.x, &A_dual.y); + fp2_mul(&t2, &A_dual.z, &A_dual.t); + fp2_mul(&A->XYZ0, &t1, &A_dual.z); + fp2_mul(&A->XYT0, &t1, &A_dual.t); + fp2_mul(&A->YZT0, &t2, &A_dual.y); + fp2_mul(&A->XZT0, &t2, &A_dual.x); + + fp2_mul(&t1, &A->null_point.x, &A->null_point.y); + fp2_mul(&t2, &A->null_point.z, &A->null_point.t); + fp2_mul(&A->xyz0, &t1, &A->null_point.z); + fp2_mul(&A->xyt0, &t1, &A->null_point.t); + fp2_mul(&A->yzt0, &t2, &A->null_point.y); + fp2_mul(&A->xzt0, &t2, &A->null_point.x); + + A->precomputation = true; +} + +void +double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in) +{ + to_squared_theta(out, in); + fp2_sqr(&out->x, &out->x); + fp2_sqr(&out->y, &out->y); + fp2_sqr(&out->z, &out->z); + fp2_sqr(&out->t, &out->t); + + if (!A->precomputation) { + theta_precomputation(A); + } + fp2_mul(&out->x, &out->x, &A->YZT0); + fp2_mul(&out->y, &out->y, &A->XZT0); + fp2_mul(&out->z, &out->z, &A->XYT0); + fp2_mul(&out->t, &out->t, &A->XYZ0); + + hadamard(out, out); + + fp2_mul(&out->x, &out->x, &A->yzt0); + fp2_mul(&out->y, &out->y, &A->xzt0); + fp2_mul(&out->z, &out->z, &A->xyt0); + fp2_mul(&out->t, &out->t, &A->xyz0); +} + +void +double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp) +{ + if (exp == 0) { + *out = *in; + } else { + double_point(out, A, in); + for (int i = 1; i < exp; i++) { + double_point(out, A, out); + } + } +} + +uint32_t +is_product_theta_point(const theta_point_t *P) +{ + fp2_t t1, t2; + fp2_mul(&t1, &P->x, &P->t); + fp2_mul(&t2, &P->y, &P->z); + return fp2_is_equal(&t1, &t2); +} diff --git a/src/pqm4/sqisign_lvl1/ref/theta_structure.h b/src/pqm4/sqisign_lvl1/ref/theta_structure.h new file mode 100644 index 0000000..fc630b7 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/theta_structure.h @@ -0,0 +1,135 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta structure header + */ + +#ifndef THETA_STRUCTURE_H +#define THETA_STRUCTURE_H + +#include +#include +#include + +/** @internal + * @ingroup hd_module + * @defgroup hd_theta Functions for theta structures + * @{ + */ + +/** + * @brief Perform the hadamard transform on a theta point + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x+y+z+t, x-y+z-t, x+y-z-t, x-y-z+t) + * + */ +static inline void +hadamard(theta_point_t *out, const theta_point_t *in) +{ + fp2_t t1, t2, t3, t4; + + // t1 = x + y + fp2_add(&t1, &in->x, &in->y); + // t2 = x - y + fp2_sub(&t2, &in->x, &in->y); + // t3 = z + t + fp2_add(&t3, &in->z, &in->t); + // t4 = z - t + fp2_sub(&t4, &in->z, &in->t); + + fp2_add(&out->x, &t1, &t3); + fp2_add(&out->y, &t2, &t4); + fp2_sub(&out->z, &t1, &t3); + fp2_sub(&out->t, &t2, &t4); +} + +/** + * @brief Square the coordinates of a theta point + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2, y^2, z^2, t^2) + * + */ +static inline void +pointwise_square(theta_point_t *out, const theta_point_t *in) +{ + fp2_sqr(&out->x, &in->x); + fp2_sqr(&out->y, &in->y); + fp2_sqr(&out->z, &in->z); + fp2_sqr(&out->t, &in->t); +} + +/** + * @brief Square the coordinates and then perform the hadamard transform + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2+y^2+z^2+t^2, x^2-y^2+z^2-t^2, x^2+y^2-z^2-t^2, x^2-y^2-z^2+t^2) + * + */ +static inline void +to_squared_theta(theta_point_t *out, const theta_point_t *in) +{ + pointwise_square(out, in); + hadamard(out, out); +} + +/** + * @brief Perform the theta structure precomputation + * + * @param A Output: the theta_structure + * + * if A.null_point = (x,y,z,t) + * if (xx,yy,zz,tt) = to_squared_theta(A.null_point) + * Computes y0,z0,t0,Y0,Z0,T0 = x/y,x/z,x/t,XX/YY,XX/ZZ,XX/TT + * + */ +void theta_precomputation(theta_structure_t *A); + +/** + * @brief Compute the double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * in = (x,y,z,t) + * out = [2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in); + +/** + * @brief Compute the iterated double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * @param exp the exponent + * in = (x,y,z,t) + * out = [2^2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp); + +/* + * @brief Check if a theta point is a product theta point + * + * @param P a theta point + * @return 0xFFFFFFFF if true, zero otherwise + */ +uint32_t is_product_theta_point(const theta_point_t *P); + +// end hd_theta +/** + * @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/tools.h b/src/pqm4/sqisign_lvl1/ref/tools.h new file mode 100644 index 0000000..5a6a505 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/tools.h @@ -0,0 +1,49 @@ + +#ifndef TOOLS_H +#define TOOLS_H + +#include + +// Debug printing: +// https://stackoverflow.com/questions/1644868/define-macro-for-debug-printing-in-c +#ifndef NDEBUG +#define DEBUG_PRINT 1 +#else +#define DEBUG_PRINT 0 +#endif + +#ifndef __FILE_NAME__ +#define __FILE_NAME__ "NA" +#endif + +#ifndef __LINE__ +#define __LINE__ 0 +#endif + +#ifndef __func__ +#define __func__ "NA" +#endif + +#define debug_print(fmt) \ + do { \ + if (DEBUG_PRINT) \ + printf("warning: %s, file %s, line %d, function %s().\n", \ + fmt, \ + __FILE_NAME__, \ + __LINE__, \ + __func__); \ + } while (0) + + +clock_t tic(void); +float tac(void); /* time in ms since last tic */ +float TAC(const char *str); /* same, but prints it with label 'str' */ +float toc(const clock_t t); /* time in ms since t */ +float TOC(const clock_t t, const char *str); /* same, but prints it with label 'str' */ +float TOC_clock(const clock_t t, const char *str); + +clock_t dclock(const clock_t t); // return the clock cycle diff between now and t +float clock_to_time(const clock_t t, + const char *str); // convert the number of clock cycles t to time +float clock_print(const clock_t t, const char *str); +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/tutil.h b/src/pqm4/sqisign_lvl1/ref/tutil.h new file mode 100644 index 0000000..59f1620 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/tutil.h @@ -0,0 +1,36 @@ +#ifndef TUTIL_H +#define TUTIL_H + +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define BSWAP16(i) __builtin_bswap16((i)) +#define BSWAP32(i) __builtin_bswap32((i)) +#define BSWAP64(i) __builtin_bswap64((i)) +#define UNUSED __attribute__((unused)) +#else +#define BSWAP16(i) ((((i) >> 8) & 0xff) | (((i) & 0xff00) << 8)) +#define BSWAP32(i) \ + ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24)) +#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32) +#define UNUSED +#endif + +#if defined(RADIX_64) +#define digit_t uint64_t +#define sdigit_t int64_t +#define RADIX 64 +#define LOG2RADIX 6 +#define BSWAP_DIGIT(i) BSWAP64(i) +#elif defined(RADIX_32) +#define digit_t uint32_t +#define sdigit_t int32_t +#define RADIX 32 +#define LOG2RADIX 5 +#define BSWAP_DIGIT(i) BSWAP32(i) +#else +#error "Radix must be 32bit or 64 bit" +#endif + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/verification.h b/src/pqm4/sqisign_lvl1/ref/verification.h new file mode 100644 index 0000000..af67469 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/verification.h @@ -0,0 +1,123 @@ +/** @file + * + * @brief The verification protocol + */ + +#ifndef VERIFICATION_H +#define VERIFICATION_H + +#include +#include + +/** @defgroup verification SQIsignHD verification protocol + * @{ + */ + +/** @defgroup verification_t Types for SQIsignHD verification protocol + * @{ + */ + +typedef digit_t scalar_t[NWORDS_ORDER]; +typedef scalar_t scalar_mtx_2x2_t[2][2]; + +/** @brief Type for the signature + * + * @typedef signature_t + * + * @struct signature + * + */ +typedef struct signature +{ + fp2_t E_aux_A; // the Montgomery A-coefficient for the auxiliary curve + uint8_t backtracking; + uint8_t two_resp_length; + scalar_mtx_2x2_t mat_Bchall_can_to_B_chall; // the matrix of the desired basis + scalar_t chall_coeff; + uint8_t hint_aux; + uint8_t hint_chall; +} signature_t; + +/** @brief Type for the public keys + * + * @typedef public_key_t + * + * @struct public_key + * + */ +typedef struct public_key +{ + ec_curve_t curve; // the normalized A-coefficient of the Montgomery curve + uint8_t hint_pk; +} public_key_t; + +/** @} + */ + +/*************************** Functions *****************************/ + +void public_key_init(public_key_t *pk); +void public_key_finalize(public_key_t *pk); + +void hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length); + +/** + * @brief Verification + * + * @param sig signature + * @param pk public key + * @param m message + * @param l size + * @returns 1 if the signature verifies, 0 otherwise + */ +int protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l); + +/*************************** Encoding *****************************/ + +/** @defgroup encoding Encoding and decoding functions + * @{ + */ + +/** + * @brief Encodes a signature as a byte array + * + * @param enc : Byte array to encode the signature in + * @param sig : Signature to encode + */ +void signature_to_bytes(unsigned char *enc, const signature_t *sig); + +/** + * @brief Decodes a signature from a byte array + * + * @param sig : Structure to decode the signature in + * @param enc : Byte array to decode + */ +void signature_from_bytes(signature_t *sig, const unsigned char *enc); + +/** + * @brief Encodes a public key as a byte array + * + * @param enc : Byte array to encode the public key in + * @param pk : Public key to encode + */ +unsigned char *public_key_to_bytes(unsigned char *enc, const public_key_t *pk); + +/** + * @brief Decodes a public key from a byte array + * + * @param pk : Structure to decode the public key in + * @param enc : Byte array to decode + */ +const unsigned char *public_key_from_bytes(public_key_t *pk, const unsigned char *enc); + +/** @} + */ + +/** @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl1/ref/verify.c b/src/pqm4/sqisign_lvl1/ref/verify.c new file mode 100644 index 0000000..b5f78ad --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/verify.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include + +// Check that the basis change matrix elements are canonical +// representatives modulo 2^(SQIsign_response_length + 2). +static int +check_canonical_basis_change_matrix(const signature_t *sig) +{ + // This works as long as all values in sig->mat_Bchall_can_to_B_chall are + // positive integers. + int ret = 1; + scalar_t aux; + + memset(aux, 0, NWORDS_ORDER * sizeof(digit_t)); + aux[0] = 0x1; + multiple_mp_shiftl(aux, SQIsign_response_length + HD_extra_torsion - (int)sig->backtracking, NWORDS_ORDER); + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + if (mp_compare(aux, sig->mat_Bchall_can_to_B_chall[i][j], NWORDS_ORDER) <= 0) { + ret = 0; + } + } + } + + return ret; +} + +// Compute the 2^n isogeny from the signature with kernel +// P + [chall_coeff]Q and store the codomain in E_chall +static int +compute_challenge_verify(ec_curve_t *E_chall, const signature_t *sig, const ec_curve_t *Epk, const uint8_t hint_pk) +{ + ec_basis_t bas_EA; + ec_isog_even_t phi_chall; + + // Set domain and length of 2^n isogeny + copy_curve(&phi_chall.curve, Epk); + phi_chall.length = TORSION_EVEN_POWER - sig->backtracking; + + // Compute the basis from the supplied hint + if (!ec_curve_to_basis_2f_from_hint(&bas_EA, &phi_chall.curve, TORSION_EVEN_POWER, hint_pk)) // canonical + return 0; + + // recovering the exact challenge + { + if (!ec_ladder3pt(&phi_chall.kernel, sig->chall_coeff, &bas_EA.P, &bas_EA.Q, &bas_EA.PmQ, &phi_chall.curve)) { + return 0; + }; + } + + // Double the kernel until is has the correct order + ec_dbl_iter(&phi_chall.kernel, sig->backtracking, &phi_chall.kernel, &phi_chall.curve); + + // Compute the codomain + copy_curve(E_chall, &phi_chall.curve); + if (ec_eval_even(E_chall, &phi_chall, NULL, 0)) + return 0; + return 1; +} + +// same as matrix_application_even_basis() in id2iso.c, with some modifications: +// - this version works with a matrix of scalars (not ibz_t). +// - reduction modulo 2^f of matrix elements is removed here, because it is +// assumed that the elements are already cannonical representatives modulo +// 2^f; this is ensured by calling check_canonical_basis_change_matrix() at +// the beginning of protocols_verify(). +static int +matrix_scalar_application_even_basis(ec_basis_t *bas, const ec_curve_t *E, scalar_mtx_2x2_t *mat, int f) +{ + scalar_t scalar0, scalar1; + memset(scalar0, 0, NWORDS_ORDER * sizeof(digit_t)); + memset(scalar1, 0, NWORDS_ORDER * sizeof(digit_t)); + + ec_basis_t tmp_bas; + copy_basis(&tmp_bas, bas); + + // For a matrix [[a, c], [b, d]] we compute: + // + // first basis element R = [a]P + [b]Q + if (!ec_biscalar_mul(&bas->P, (*mat)[0][0], (*mat)[1][0], f, &tmp_bas, E)) + return 0; + // second basis element S = [c]P + [d]Q + if (!ec_biscalar_mul(&bas->Q, (*mat)[0][1], (*mat)[1][1], f, &tmp_bas, E)) + return 0; + // Their difference R - S = [a - c]P + [b - d]Q + mp_sub(scalar0, (*mat)[0][0], (*mat)[0][1], NWORDS_ORDER); + mp_mod_2exp(scalar0, f, NWORDS_ORDER); + mp_sub(scalar1, (*mat)[1][0], (*mat)[1][1], NWORDS_ORDER); + mp_mod_2exp(scalar1, f, NWORDS_ORDER); + return ec_biscalar_mul(&bas->PmQ, scalar0, scalar1, f, &tmp_bas, E); +} + +// Compute the bases for the challenge and auxillary curve from +// the canonical bases. Challenge basis is reconstructed from the +// compressed scalars within the challenge. +static int +challenge_and_aux_basis_verify(ec_basis_t *B_chall_can, + ec_basis_t *B_aux_can, + ec_curve_t *E_chall, + ec_curve_t *E_aux, + signature_t *sig, + const int pow_dim2_deg_resp) +{ + + // recovering the canonical basis as TORSION_EVEN_POWER for consistency with signing + if (!ec_curve_to_basis_2f_from_hint(B_chall_can, E_chall, TORSION_EVEN_POWER, sig->hint_chall)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_chall_can, + TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion - sig->two_resp_length, + B_chall_can, + E_chall); + + if (!ec_curve_to_basis_2f_from_hint(B_aux_can, E_aux, TORSION_EVEN_POWER, sig->hint_aux)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_aux_can, TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion, B_aux_can, E_aux); + +#ifndef NDEBUG + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp + sig->two_resp_length)) + debug_print("canonical basis has wrong order, expect something to fail"); +#endif + + // applying the change matrix on the basis of E_chall + return matrix_scalar_application_even_basis(B_chall_can, + E_chall, + &sig->mat_Bchall_can_to_B_chall, + pow_dim2_deg_resp + HD_extra_torsion + sig->two_resp_length); +} + +// When two_resp_length is non-zero, we must compute a small 2^n-isogeny +// updating E_chall as the codomain as well as push the basis on E_chall +// through this isogeny +static int +two_response_isogeny_verify(ec_curve_t *E_chall, ec_basis_t *B_chall_can, const signature_t *sig, int pow_dim2_deg_resp) +{ + ec_point_t ker, points[3]; + + // choosing the right point for the small two_isogenies + if (mp_is_even(sig->mat_Bchall_can_to_B_chall[0][0], NWORDS_ORDER) && + mp_is_even(sig->mat_Bchall_can_to_B_chall[1][0], NWORDS_ORDER)) { + copy_point(&ker, &B_chall_can->Q); + } else { + copy_point(&ker, &B_chall_can->P); + } + + copy_point(&points[0], &B_chall_can->P); + copy_point(&points[1], &B_chall_can->Q); + copy_point(&points[2], &B_chall_can->PmQ); + + ec_dbl_iter(&ker, pow_dim2_deg_resp + HD_extra_torsion, &ker, E_chall); + +#ifndef NDEBUG + if (!test_point_order_twof(&ker, E_chall, sig->two_resp_length)) + debug_print("kernel does not have order 2^(two_resp_length"); +#endif + + if (ec_eval_small_chain(E_chall, &ker, sig->two_resp_length, points, 3, false)) { + return 0; + } + +#ifndef NDEBUG + if (!test_point_order_twof(&points[0], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[0] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[1], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[1] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[2], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[2] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + copy_point(&B_chall_can->P, &points[0]); + copy_point(&B_chall_can->Q, &points[1]); + copy_point(&B_chall_can->PmQ, &points[2]); + return 1; +} + +// The commitment curve can be recovered from the codomain of the 2D +// isogeny built from the bases computed during verification. +static int +compute_commitment_curve_verify(ec_curve_t *E_com, + const ec_basis_t *B_chall_can, + const ec_basis_t *B_aux_can, + const ec_curve_t *E_chall, + const ec_curve_t *E_aux, + int pow_dim2_deg_resp) + +{ +#ifndef NDEBUG + // Check all the points are the correct order + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_chall_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + + if (!test_basis_order_twof(B_aux_can, E_aux, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_aux_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + // now compute the dim2 isogeny from Echall x E_aux -> E_com x E_aux' + // of kernel B_chall_can x B_aux_can + + // first we set-up the kernel + theta_couple_curve_t EchallxEaux; + copy_curve(&EchallxEaux.E1, E_chall); + copy_curve(&EchallxEaux.E2, E_aux); + + theta_kernel_couple_points_t dim_two_ker; + copy_bases_to_kernel(&dim_two_ker, B_chall_can, B_aux_can); + + // computing the isogeny + theta_couple_curve_t codomain; + int codomain_splits; + ec_curve_init(&codomain.E1); + ec_curve_init(&codomain.E2); + // handling the special case where we don't need to perform any dim2 computation + if (pow_dim2_deg_resp == 0) { + codomain_splits = 1; + copy_curve(&codomain.E1, &EchallxEaux.E1); + copy_curve(&codomain.E2, &EchallxEaux.E2); + // We still need to check that E_chall is supersingular + // This assumes that HD_extra_torsion == 2 + if (!ec_is_basis_four_torsion(B_chall_can, E_chall)) { + return 0; + } + } else { + codomain_splits = theta_chain_compute_and_eval_verify( + pow_dim2_deg_resp, &EchallxEaux, &dim_two_ker, true, &codomain, NULL, 0); + } + + // computing the commitment curve + // its always the first one because of our (2^n,2^n)-isogeny formulae + copy_curve(E_com, &codomain.E1); + + return codomain_splits; +} + +// SQIsign verification +int +protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l) +{ + int verify; + + if (!check_canonical_basis_change_matrix(sig)) + return 0; + + // Computation of the length of the dim 2 2^n isogeny + int pow_dim2_deg_resp = SQIsign_response_length - (int)sig->two_resp_length - (int)sig->backtracking; + + // basic sanity test: checking that the response is not too long + if (pow_dim2_deg_resp < 0) + return 0; + // The dim 2 isogeny embeds a dim 1 isogeny of odd degree, so it can + // never be of length 2. + if (pow_dim2_deg_resp == 1) + return 0; + + // check the public curve is valid + if (!ec_curve_verify_A(&(pk->curve).A)) + return 0; + + // Set auxiliary curve from the A-coefficient within the signature + ec_curve_t E_aux; + if (!ec_curve_init_from_A(&E_aux, &sig->E_aux_A)) + return 0; // invalid curve + + // checking that we are given A-coefficients and no precomputation + assert(fp2_is_one(&pk->curve.C) == 0xFFFFFFFF && !pk->curve.is_A24_computed_and_normalized); + + // computation of the challenge + ec_curve_t E_chall; + if (!compute_challenge_verify(&E_chall, sig, &pk->curve, pk->hint_pk)) { + return 0; + } + + // Computation of the canonical bases for the challenge and aux curve + ec_basis_t B_chall_can, B_aux_can; + + if (!challenge_and_aux_basis_verify(&B_chall_can, &B_aux_can, &E_chall, &E_aux, sig, pow_dim2_deg_resp)) { + return 0; + } + + // When two_resp_length != 0 we need to compute a second, short 2^r-isogeny + if (sig->two_resp_length > 0) { + if (!two_response_isogeny_verify(&E_chall, &B_chall_can, sig, pow_dim2_deg_resp)) { + return 0; + } + } + + // We can recover the commitment curve with a 2D isogeny + // The supplied signature did not compute an isogeny between eliptic products + // and so definitely is an invalid signature. + ec_curve_t E_com; + if (!compute_commitment_curve_verify(&E_com, &B_chall_can, &B_aux_can, &E_chall, &E_aux, pow_dim2_deg_resp)) + return 0; + + scalar_t chk_chall; + + // recomputing the challenge vector + hash_to_challenge(&chk_chall, pk, &E_com, m, l); + + // performing the final check + verify = mp_compare(sig->chall_coeff, chk_chall, NWORDS_ORDER) == 0; + + return verify; +} diff --git a/src/pqm4/sqisign_lvl1/ref/xeval.c b/src/pqm4/sqisign_lvl1/ref/xeval.c new file mode 100644 index 0000000..7fc7170 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/xeval.c @@ -0,0 +1,64 @@ +#include "isog.h" +#include "ec.h" +#include + +// ----------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------- + +// Degree-2 isogeny evaluation with kenerl generated by P != (0, 0) +void +xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1, t2; + for (int j = 0; j < lenQ; j++) { + fp2_add(&t0, &Q[j].x, &Q[j].z); + fp2_sub(&t1, &Q[j].x, &Q[j].z); + fp2_mul(&t2, &kps->K.x, &t1); + fp2_mul(&t1, &kps->K.z, &t0); + fp2_add(&t0, &t2, &t1); + fp2_sub(&t1, &t2, &t1); + fp2_mul(&R[j].x, &Q[j].x, &t0); + fp2_mul(&R[j].z, &Q[j].z, &t1); + } +} + +void +xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1; + for (int i = 0; i < lenQ; i++) { + fp2_mul(&t0, &Q[i].x, &Q[i].z); + fp2_mul(&t1, &kps->K.x, &Q[i].z); + fp2_add(&t1, &t1, &Q[i].x); + fp2_mul(&t1, &t1, &Q[i].x); + fp2_sqr(&R[i].x, &Q[i].z); + fp2_add(&R[i].x, &R[i].x, &t1); + fp2_mul(&R[i].z, &t0, &kps->K.z); + } +} + +// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P != (0, 0) +void +xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps) +{ + const ec_point_t *K = kps->K; + + fp2_t t0, t1; + + for (int i = 0; i < lenQ; i++) { + fp2_add(&t0, &Q[i].x, &Q[i].z); + fp2_sub(&t1, &Q[i].x, &Q[i].z); + fp2_mul(&(R[i].x), &t0, &K[1].x); + fp2_mul(&(R[i].z), &t1, &K[2].x); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &K[0].x); + fp2_add(&t1, &(R[i].x), &(R[i].z)); + fp2_sub(&(R[i].z), &(R[i].x), &(R[i].z)); + fp2_sqr(&t1, &t1); + fp2_sqr(&(R[i].z), &(R[i].z)); + fp2_add(&(R[i].x), &t0, &t1); + fp2_sub(&t0, &t0, &(R[i].z)); + fp2_mul(&(R[i].x), &(R[i].x), &t1); + fp2_mul(&(R[i].z), &(R[i].z), &t0); + } +} diff --git a/src/pqm4/sqisign_lvl1/ref/xisog.c b/src/pqm4/sqisign_lvl1/ref/xisog.c new file mode 100644 index 0000000..7242d29 --- /dev/null +++ b/src/pqm4/sqisign_lvl1/ref/xisog.c @@ -0,0 +1,61 @@ +#include "isog.h" +#include "ec.h" +#include + +// ------------------------------------------------------------------------- +// ------------------------------------------------------------------------- + +// Degree-2 isogeny with kernel generated by P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P) +{ + fp2_sqr(&B->x, &P.x); + fp2_sqr(&B->z, &P.z); + fp2_sub(&B->x, &B->z, &B->x); + fp2_add(&kps->K.x, &P.x, &P.z); + fp2_sub(&kps->K.z, &P.x, &P.z); +} + +void +xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24) +{ + // No need to check the square root, only used for signing. + fp2_t t0, four; + fp2_set_small(&four, 4); + fp2_add(&t0, &A24.x, &A24.x); + fp2_sub(&t0, &t0, &A24.z); + fp2_add(&t0, &t0, &t0); + fp2_inv(&A24.z); + fp2_mul(&t0, &t0, &A24.z); + fp2_copy(&kps->K.x, &t0); + fp2_add(&B24->x, &t0, &t0); + fp2_sqr(&t0, &t0); + fp2_sub(&t0, &t0, &four); + fp2_sqrt(&t0); + fp2_neg(&kps->K.z, &t0); + fp2_add(&B24->z, &t0, &t0); + fp2_add(&B24->x, &B24->x, &B24->z); + fp2_add(&B24->z, &B24->z, &B24->z); +} + +// Degree-4 isogeny with kernel generated by P such that [2]P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P) +{ + ec_point_t *K = kps->K; + + fp2_sqr(&K[0].x, &P.x); + fp2_sqr(&K[0].z, &P.z); + fp2_add(&K[1].x, &K[0].z, &K[0].x); + fp2_sub(&K[1].z, &K[0].z, &K[0].x); + fp2_mul(&B->x, &K[1].x, &K[1].z); + fp2_sqr(&B->z, &K[0].z); + + // Constants for xeval_4 + fp2_add(&K[2].x, &P.x, &P.z); + fp2_sub(&K[1].x, &P.x, &P.z); + fp2_add(&K[0].x, &K[0].z, &K[0].z); + fp2_add(&K[0].x, &K[0].x, &K[0].x); +} diff --git a/src/pqm4/sqisign_lvl3/ref/api.h b/src/pqm4/sqisign_lvl3/ref/api.h new file mode 100644 index 0000000..1670ea6 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/api.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef api_h +#define api_h + +#include +#include + +#define CRYPTO_SECRETKEYBYTES 529 +#define CRYPTO_PUBLICKEYBYTES 97 +#define CRYPTO_BYTES 224 + +#define CRYPTO_ALGNAME "SQIsign_lvl3" + +SQISIGN_API +int +crypto_sign_keypair(unsigned char *pk, unsigned char *sk); + +SQISIGN_API +int +crypto_sign(unsigned char *sm, size_t *smlen, + const unsigned char *m, size_t mlen, + const unsigned char *sk); + +SQISIGN_API +int +crypto_sign_open(unsigned char *m, size_t *mlen, + const unsigned char *sm, size_t smlen, + const unsigned char *pk); + +#endif /* api_h */ diff --git a/src/pqm4/sqisign_lvl3/ref/basis.c b/src/pqm4/sqisign_lvl3/ref/basis.c new file mode 100644 index 0000000..94cb7fc --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/basis.c @@ -0,0 +1,416 @@ +#include "ec.h" +#include "fp2.h" +#include "e0_basis.h" +#include + +uint32_t +ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve) +{ // Recover y-coordinate of a point on the Montgomery curve y^2 = x^3 + Ax^2 + x + fp2_t t0; + + fp2_sqr(&t0, Px); + fp2_mul(y, &t0, &curve->A); // Ax^2 + fp2_add(y, y, Px); // Ax^2 + x + fp2_mul(&t0, &t0, Px); + fp2_add(y, y, &t0); // x^3 + Ax^2 + x + // This is required, because we do not yet know that our curves are + // supersingular so our points live on the twist with B = 1. + return fp2_sqrt_verify(y); +} + +static void +difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve) +{ + // Given P,Q in projective x-only, computes a deterministic choice for (P-Q) + // Based on Proposition 3 of https://eprint.iacr.org/2017/518.pdf + + fp2_t Bxx, Bxz, Bzz, t0, t1; + + fp2_mul(&t0, &P->x, &Q->x); + fp2_mul(&t1, &P->z, &Q->z); + fp2_sub(&Bxx, &t0, &t1); + fp2_sqr(&Bxx, &Bxx); + fp2_mul(&Bxx, &Bxx, &curve->C); // C*(P.x*Q.x-P.z*Q.z)^2 + fp2_add(&Bxz, &t0, &t1); + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + fp2_add(&Bzz, &t0, &t1); + fp2_mul(&Bxz, &Bxz, &Bzz); // (P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_sub(&Bzz, &t0, &t1); + fp2_sqr(&Bzz, &Bzz); + fp2_mul(&Bzz, &Bzz, &curve->C); // C*(P.x*Q.z-P.z*Q.x)^2 + fp2_mul(&Bxz, &Bxz, &curve->C); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &curve->A); + fp2_add(&t0, &t0, &t0); + fp2_add(&Bxz, &Bxz, &t0); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + 2*A*P.x*Q.z*P.z*Q.x + + // To ensure that the denominator is a fourth power in Fp, we normalize by + // C*C_bar^2*(P.z)_bar^2*(Q.z)_bar^2 + fp_copy(&t0.re, &curve->C.re); + fp_neg(&t0.im, &curve->C.im); + fp2_sqr(&t0, &t0); + fp2_mul(&t0, &t0, &curve->C); + fp_copy(&t1.re, &P->z.re); + fp_neg(&t1.im, &P->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp_copy(&t1.re, &Q->z.re); + fp_neg(&t1.im, &Q->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&Bxx, &Bxx, &t0); + fp2_mul(&Bxz, &Bxz, &t0); + fp2_mul(&Bzz, &Bzz, &t0); + + // Solving quadratic equation + fp2_sqr(&t0, &Bxz); + fp2_mul(&t1, &Bxx, &Bzz); + fp2_sub(&t0, &t0, &t1); + // No need to check if t0 is square, as per the entangled basis algorithm. + fp2_sqrt(&t0); + fp2_add(&PQ->x, &Bxz, &t0); + fp2_copy(&PQ->z, &Bzz); +} + +// Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and the point +// P = (X/Z : 1). For generic implementation see lift_basis() +uint32_t +lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + assert(fp2_is_one(&B->P.z)); + assert(fp2_is_one(&E->C)); + + fp2_copy(&P->x, &B->P.x); + fp2_copy(&Q->x, &B->Q.x); + fp2_copy(&Q->z, &B->Q.z); + fp2_set_one(&P->z); + uint32_t ret = ec_recover_y(&P->y, &P->x, E); + + // Algorithm of Okeya-Sakurai to recover y.Q in the montgomery model + fp2_t v1, v2, v3, v4; + fp2_mul(&v1, &P->x, &Q->z); + fp2_add(&v2, &Q->x, &v1); + fp2_sub(&v3, &Q->x, &v1); + fp2_sqr(&v3, &v3); + fp2_mul(&v3, &v3, &B->PmQ.x); + fp2_add(&v1, &E->A, &E->A); + fp2_mul(&v1, &v1, &Q->z); + fp2_add(&v2, &v2, &v1); + fp2_mul(&v4, &P->x, &Q->x); + fp2_add(&v4, &v4, &Q->z); + fp2_mul(&v2, &v2, &v4); + fp2_mul(&v1, &v1, &Q->z); + fp2_sub(&v2, &v2, &v1); + fp2_mul(&v2, &v2, &B->PmQ.z); + fp2_sub(&Q->y, &v3, &v2); + fp2_add(&v1, &P->y, &P->y); + fp2_mul(&v1, &v1, &Q->z); + fp2_mul(&v1, &v1, &B->PmQ.z); + fp2_mul(&Q->x, &Q->x, &v1); + fp2_mul(&Q->z, &Q->z, &v1); + + // Transforming to a jacobian coordinate + fp2_sqr(&v1, &Q->z); + fp2_mul(&Q->y, &Q->y, &v1); + fp2_mul(&Q->x, &Q->x, &Q->z); + return ret; +} + +uint32_t +lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + // Normalise the curve E such that (A : C) is (A/C : 1) + // and the point x(P) = (X/Z : 1). + fp2_t inverses[2]; + fp2_copy(&inverses[0], &B->P.z); + fp2_copy(&inverses[1], &E->C); + + fp2_batched_inv(inverses, 2); + fp2_set_one(&B->P.z); + fp2_set_one(&E->C); + + fp2_mul(&B->P.x, &B->P.x, &inverses[0]); + fp2_mul(&E->A, &E->A, &inverses[1]); + + // Lift the basis to Jacobian points P, Q + return lift_basis_normalized(P, Q, B, E); +} + +// Given an x-coordinate, determines if this is a valid +// point on the curve. Assumes C=1. +static uint32_t +is_on_curve(const fp2_t *x, const ec_curve_t *curve) +{ + assert(fp2_is_one(&curve->C)); + fp2_t t0; + + fp2_add(&t0, x, &curve->A); // x + (A/C) + fp2_mul(&t0, &t0, x); // x^2 + (A/C)*x + fp2_add_one(&t0, &t0); // x^2 + (A/C)*x + 1 + fp2_mul(&t0, &t0, x); // x^3 + (A/C)*x^2 + x + + return fp2_is_square(&t0); +} + +// Helper function which given a point of order k*2^n with n maximal +// and k odd, computes a point of order 2^f +static inline void +clear_cofactor_for_maximal_even_order(ec_point_t *P, ec_curve_t *curve, int f) +{ + // clear out the odd cofactor to get a point of order 2^n + ec_mul(P, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, P, curve); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_A24(P, P, &curve->A24, curve->is_A24_computed_and_normalized); + } +} + +// Helper function which finds an NQR -1 / (1 + i*b) for entangled basis generation +static uint8_t +find_nqr_factor(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + // factor = -1/(1 + i*b) for b in Fp will be NQR whenever 1 + b^2 is NQR + // in Fp, so we find one of these and then invert (1 + i*b). We store b + // as a u8 hint to save time in verification. + + // We return the hint as a u8, but use (uint16_t)n to give 2^16 - 1 + // to make failure cryptographically negligible, with a fallback when + // n > 128 is required. + uint8_t hint; + uint32_t found = 0; + uint16_t n = start; + + bool qr_b = 1; + fp_t b, tmp; + fp2_t z, t0, t1; + + do { + while (qr_b) { + // find b with 1 + b^2 a non-quadratic residue + fp_set_small(&tmp, (uint32_t)n * n + 1); + qr_b = fp_is_square(&tmp); + n++; // keeps track of b = n - 1 + } + + // for Px := -A/(1 + i*b) to be on the curve + // is equivalent to A^2*(z-1) - z^2 NQR for z = 1 + i*b + // thus prevents unnecessary inversion pre-check + + // t0 = z - 1 = i*b + // t1 = z = 1 + i*b + fp_set_small(&b, (uint32_t)n - 1); + fp2_set_zero(&t0); + fp2_set_one(&z); + fp_copy(&z.im, &b); + fp_copy(&t0.im, &b); + + // A^2*(z-1) - z^2 + fp2_sqr(&t1, &curve->A); + fp2_mul(&t0, &t0, &t1); // A^2 * (z - 1) + fp2_sqr(&t1, &z); + fp2_sub(&t0, &t0, &t1); // A^2 * (z - 1) - z^2 + found = !fp2_is_square(&t0); + + qr_b = 1; + } while (!found); + + // set Px to -A/(1 + i*b) + fp2_copy(x, &z); + fp2_inv(x); + fp2_mul(x, x, &curve->A); + fp2_neg(x, x); + + /* + * With very low probability n will not fit in 7 bits. + * We set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + hint = n <= 128 ? n - 1 : 0; + + return hint; +} + +// Helper function which finds a point x(P) = n * A +static uint8_t +find_nA_x_coord(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + assert(!fp2_is_square(&curve->A)); // Only to be called when A is a NQR + + // when A is NQR we allow x(P) to be a multiple n*A of A + uint8_t n = start; + if (n == 1) { + fp2_copy(x, &curve->A); + } else { + fp2_mul_small(x, &curve->A, n); + } + + while (!is_on_curve(x, curve)) { + fp2_add(x, x, &curve->A); + n++; + } + + /* + * With very low probability (1/2^128), n will not fit in 7 bits. + * In this case, we set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + uint8_t hint = n < 128 ? n : 0; + return hint; +} + +// The entangled basis generation does not allow A = 0 +// so we simply return the one we have already precomputed +static void +ec_basis_E0_2f(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + assert(fp2_is_zero(&curve->A)); + ec_point_t P, Q; + + // Set P, Q to precomputed (X : 1) values + fp2_copy(&P.x, &BASIS_E0_PX); + fp2_copy(&Q.x, &BASIS_E0_QX); + fp2_set_one(&P.z); + fp2_set_one(&Q.z); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_E0(&P, &P); + xDBL_E0(&Q, &Q); + } + + // Set P, Q in the basis and compute x(P - Q) + copy_point(&PQ2->P, &P); + copy_point(&PQ2->Q, &Q); + difference_point(&PQ2->PmQ, &P, &Q, curve); +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// and stores hints as an array for faster recomputation at a later point +uint8_t +ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 0; + } + + uint8_t hint; + bool hint_A = fp2_is_square(&curve->A); + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_A) { + // when A is NQR we allow x(P) to be a multiple n*A of A + hint = find_nA_x_coord(&P.x, curve, 1); + } else { + // when A is QR we instead have to find (1 + b^2) a NQR + // such that x(P) = -A / (1 + i*b) + hint = find_nqr_factor(&P.x, curve, 1); + } + + fp2_set_one(&P.z); + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + + // Finally, we compress hint_A and hint into a single bytes. + // We choose to set the LSB of hint to hint_A + assert(hint < 128); // We expect hint to be 7-bits in size + return (hint << 1) | hint_A; +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// given the hints as an array for faster basis computation +int +ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 1; + } + + // The LSB of hint encodes whether A is a QR + // The remaining 7-bits are used to find a valid x(P) + bool hint_A = hint & 1; + uint8_t hint_P = hint >> 1; + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_P) { + // When hint_P = 0 it means we did not find a point in 128 attempts + // this is very rare and we almost never expect to need this fallback + // In either case, we can start with b = 128 to skip testing the known + // values which will not work + if (!hint_A) { + find_nA_x_coord(&P.x, curve, 128); + } else { + find_nqr_factor(&P.x, curve, 128); + } + } else { + // Otherwise we use the hint to directly find x(P) based on hint_A + if (!hint_A) { + // when A is NQR, we have found n such that x(P) = n*A + fp2_mul_small(&P.x, &curve->A, hint_P); + } else { + // when A is QR we have found b such that (1 + b^2) is a NQR in + // Fp, so we must compute x(P) = -A / (1 + i*b) + fp_set_one(&P.x.re); + fp_set_small(&P.x.im, hint_P); + fp2_inv(&P.x); + fp2_mul(&P.x, &P.x, &curve->A); + fp2_neg(&P.x, &P.x); + } + } + fp2_set_one(&P.z); + +#ifndef NDEBUG + int passed = 1; + passed = is_on_curve(&P.x, curve); + passed &= !fp2_is_square(&P.x); + + if (!passed) + return 0; +#endif + + // set xQ to -xP - A + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + +#ifndef NDEBUG + passed &= test_basis_order_twof(PQ2, curve, f); + + if (!passed) + return 0; +#endif + + return 1; +} diff --git a/src/pqm4/sqisign_lvl3/ref/common.c b/src/pqm4/sqisign_lvl3/ref/common.c new file mode 100644 index 0000000..d393e9c --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/common.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +void +public_key_init(public_key_t *pk) +{ + ec_curve_init(&pk->curve); +} + +void +public_key_finalize(public_key_t *pk) +{ +} + +// compute the challenge as the hash of the message and the commitment curve and public key +void +hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length) +{ + unsigned char buf[2 * FP2_ENCODED_BYTES]; + { + fp2_t j1, j2; + ec_j_inv(&j1, &pk->curve); + ec_j_inv(&j2, com_curve); + fp2_encode(buf, &j1); + fp2_encode(buf + FP2_ENCODED_BYTES, &j2); + } + + { + // The type scalar_t represents an element of GF(p), which is about + // 2*lambda bits, where lambda = 128, 192 or 256, according to the + // security level. Thus, the variable scalar should have enough memory + // for the values produced by SHAKE256 in the intermediate iterations. + + shake256incctx ctx; + + size_t hash_bytes = ((2 * SECURITY_BITS) + 7) / 8; + size_t limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + size_t bits = (2 * SECURITY_BITS) % RADIX; + digit_t mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, buf, 2 * FP2_ENCODED_BYTES); + shake256_inc_absorb(&ctx, message, length); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + for (int i = 2; i < HASH_ITERATIONS; i++) { + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + } + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + + hash_bytes = ((TORSION_EVEN_POWER - SQIsign_response_length) + 7) / 8; + limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + bits = (TORSION_EVEN_POWER - SQIsign_response_length) % RADIX; + mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + memset(*scalar, 0, NWORDS_ORDER * sizeof(digit_t)); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + +#ifdef TARGET_BIG_ENDIAN + for (int i = 0; i < NWORDS_ORDER; i++) + (*scalar)[i] = BSWAP_DIGIT((*scalar)[i]); +#endif + + mp_mod_2exp(*scalar, SECURITY_BITS, NWORDS_ORDER); + } +} diff --git a/src/pqm4/sqisign_lvl3/ref/config.mk b/src/pqm4/sqisign_lvl3/ref/config.mk new file mode 100644 index 0000000..cd822b5 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/config.mk @@ -0,0 +1,2 @@ +elf/crypto_sign_sqisign_lvl3_ref_%.elf: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl3 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS +obj/libcrypto_sign_sqisign_lvl3_ref.a: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl3 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS diff --git a/src/pqm4/sqisign_lvl3/ref/e0_basis.c b/src/pqm4/sqisign_lvl3/ref/e0_basis.c new file mode 100644 index 0000000..1b12a83 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/e0_basis.c @@ -0,0 +1,55 @@ +#include +const fp2_t BASIS_E0_PX = { +#if 0 +#elif RADIX == 16 +{0x1196, 0x134b, 0xdbd, 0x118d, 0x712, 0x1646, 0x5d7, 0x8eb, 0x431, 0xf5b, 0x161e, 0x13b6, 0x1c07, 0x42, 0x8ba, 0xeec, 0x1a43, 0x545, 0x1cdb, 0x1659, 0x1614, 0xde, 0x72d, 0x1b80, 0x1706, 0x15a3, 0x894, 0xd4a, 0x1b2f, 0x12} +#elif RADIX == 32 +{0x9a5c65a, 0xa31adbd, 0x7b231c4, 0xc51d65d, 0x1e7ad90, 0x1e76d6, 0x8ba0217, 0xe90ddd8, 0x3cdb2a2, 0xf5852cb, 0x72d06, 0xd1dc1b7, 0xa94894a, 0x14cbd} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x31c4a31adbd9a5c6, 0xe7ad90c51d65d7b2, 0x88ba021701e76d61, 0x2cb3cdb2a2e90ddd, 0xdc1b70072d06f585, 0x16eecbda94894ad1} +#else +{0x94635b7b34b8c, 0x431475975ec8c7, 0x380f3b6b0f3d6c, 0x2e90ddd88ba021, 0x5eb0a59679b654, 0x347706dc01cb41, 0xb7765ed4a44a5} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0xa85, 0x10cc, 0x1ef, 0xb0b, 0x1082, 0x5be, 0xd14, 0x1100, 0x1a33, 0x174b, 0x181c, 0x83e, 0x1034, 0x18ba, 0x205, 0x1f39, 0x1e9, 0x1998, 0x130e, 0x801, 0xfeb, 0x698, 0xdf9, 0x6a5, 0x5b6, 0x2c8, 0x1283, 0xad9, 0x960, 0x1e} +#elif RADIX == 32 +{0x8662a17, 0x96161ef, 0x42df420, 0xce200d1, 0x1cba5e8, 0xd107d8, 0x205c5d4, 0x7a7e72, 0x330eccc, 0xc3fad00, 0x4adf934, 0x6416d8d, 0x5b32831, 0x2f581} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xf42096161ef8662a, 0xcba5e8ce200d142d, 0x2205c5d40d107d81, 0xd00330eccc07a7e7, 0x16d8d4adf934c3fa, 0x6065815b3283164} +#else +{0x412c2c3df0cc54, 0x2338803450b7d0, 0x206883ec0e5d2f, 0x407a7e72205c5d, 0x187f5a00661d99, 0x5905b6352b7e4d, 0x3032c0ad99418} +#endif +#endif +}; +const fp2_t BASIS_E0_QX = { +#if 0 +#elif RADIX == 16 +{0x16ed, 0x818, 0x127a, 0xcfb, 0x1be6, 0x1b40, 0x1bf1, 0xe75, 0x129c, 0x151, 0x425, 0x142e, 0x1edb, 0x254, 0x5cc, 0x1a5b, 0x1e1d, 0x1e27, 0x1a12, 0x8a8, 0x59e, 0x933, 0x1647, 0x686, 0x19e, 0x1e51, 0x151f, 0x1b6e, 0x1efe, 0xd} +#elif RADIX == 32 +{0x40c5bb5, 0x99f727a, 0x1da06f9, 0x71cebbf, 0x250a8ca, 0xb6e85c4, 0x5cc12a7, 0xf8774b6, 0x1a12f13, 0x9967915, 0xd64749, 0x288678d, 0x6dd51ff, 0x2ebfb} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x6f999f727a40c5b, 0x50a8ca71cebbf1da, 0x65cc12a7b6e85c42, 0x9151a12f13f8774b, 0x8678d0d647499967, 0x2e23bfb6dd51ff28} +#else +{0x7333ee4f4818b7, 0x29c73aefc7681b, 0x3db742e2128546, 0x3f8774b65cc12a, 0x332cf22a3425e2, 0x4a219e343591d2, 0x6d1dfdb6ea8ff} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x18a9, 0x1838, 0x1588, 0x1720, 0xf3f, 0x1fcd, 0x44d, 0x1e6b, 0x681, 0x1249, 0x1f8a, 0x5af, 0x1f58, 0x1c12, 0xf21, 0x1887, 0x278, 0x156a, 0xbfe, 0x765, 0x12f7, 0x4da, 0x16ce, 0x7c1, 0x1c04, 0x1773, 0x853, 0xab7, 0xe1d, 0x1a} +#elif RADIX == 32 +{0xc1c62a7, 0xee41588, 0xdfe6bcf, 0x7cd644, 0x8a9249a, 0xd60b5ff, 0xf21e097, 0x9e310e, 0xabfeab5, 0xd4bdcec, 0x836ce26, 0xb9f010f, 0x56e853b, 0x10875} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x6bcfee41588c1c62, 0xa9249a07cd644dfe, 0xef21e097d60b5ff8, 0xcecabfeab509e310, 0xf010f836ce26d4bd, 0x2a7787556e853bb9} +#else +{0x1fdc82b11838c5, 0x681f359137f9af, 0x3eb05affc54924, 0x509e310ef21e09, 0x5a97b9d957fd56, 0x6e7c043e0db389, 0x4fbc3aab7429d} +#endif +#endif +}; diff --git a/src/pqm4/sqisign_lvl3/ref/e0_basis.h b/src/pqm4/sqisign_lvl3/ref/e0_basis.h new file mode 100644 index 0000000..05cafb8 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/e0_basis.h @@ -0,0 +1,3 @@ +#include +extern const fp2_t BASIS_E0_PX; +extern const fp2_t BASIS_E0_QX; diff --git a/src/pqm4/sqisign_lvl3/ref/ec.c b/src/pqm4/sqisign_lvl3/ref/ec.c new file mode 100644 index 0000000..be4e4e5 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/ec.c @@ -0,0 +1,665 @@ +#include +#include +#include +#include + +void +ec_point_init(ec_point_t *P) +{ // Initialize point as identity element (1:0) + fp2_set_one(&(P->x)); + fp2_set_zero(&(P->z)); +} + +void +ec_curve_init(ec_curve_t *E) +{ // Initialize the curve struct + // Initialize the constants + fp2_set_zero(&(E->A)); + fp2_set_one(&(E->C)); + + // Initialize the point (A+2 : 4C) + ec_point_init(&(E->A24)); + + // Set the bool to be false by default + E->is_A24_computed_and_normalized = false; +} + +void +select_point(ec_point_t *Q, const ec_point_t *P1, const ec_point_t *P2, const digit_t option) +{ // Select points in constant time + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +cswap_points(ec_point_t *P, ec_point_t *Q, const digit_t option) +{ // Swap points in constant time + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + fp2_cswap(&(P->x), &(Q->x), option); + fp2_cswap(&(P->z), &(Q->z), option); +} + +void +ec_normalize_point(ec_point_t *P) +{ + fp2_inv(&P->z); + fp2_mul(&P->x, &P->x, &P->z); + fp2_set_one(&(P->z)); +} + +void +ec_normalize_curve(ec_curve_t *E) +{ + fp2_inv(&E->C); + fp2_mul(&E->A, &E->A, &E->C); + fp2_set_one(&E->C); +} + +void +ec_curve_normalize_A24(ec_curve_t *E) +{ + if (!E->is_A24_computed_and_normalized) { + AC_to_A24(&E->A24, E); + ec_normalize_point(&E->A24); + E->is_A24_computed_and_normalized = true; + } + assert(fp2_is_one(&E->A24.z)); +} + +void +ec_normalize_curve_and_A24(ec_curve_t *E) +{ // Neither the curve or A24 are guaranteed to be normalized. + // First we normalize (A/C : 1) and conditionally compute + if (!fp2_is_one(&E->C)) { + ec_normalize_curve(E); + } + + if (!E->is_A24_computed_and_normalized) { + // Now compute A24 = ((A + 2) / 4 : 1) + fp2_add_one(&E->A24.x, &E->A); // re(A24.x) = re(A) + 1 + fp2_add_one(&E->A24.x, &E->A24.x); // re(A24.x) = re(A) + 2 + fp_copy(&E->A24.x.im, &E->A.im); // im(A24.x) = im(A) + + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 2 + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 4 + fp2_set_one(&E->A24.z); + + E->is_A24_computed_and_normalized = true; + } +} + +uint32_t +ec_is_zero(const ec_point_t *P) +{ + return fp2_is_zero(&P->z); +} + +uint32_t +ec_has_zero_coordinate(const ec_point_t *P) +{ + return fp2_is_zero(&P->x) | fp2_is_zero(&P->z); +} + +uint32_t +ec_is_equal(const ec_point_t *P, const ec_point_t *Q) +{ // Evaluate if two points in Montgomery coordinates (X:Z) are equal + // Returns 0xFFFFFFFF (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1; + + // Check if P, Q are the points at infinity + uint32_t l_zero = ec_is_zero(P); + uint32_t r_zero = ec_is_zero(Q); + + // Check if PX * QZ = QX * PZ + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + uint32_t lr_equal = fp2_is_equal(&t0, &t1); + + // Points are equal if + // - Both are zero, or + // - neither are zero AND PX * QZ = QX * PZ + return (l_zero & r_zero) | (~l_zero & ~r_zero * lr_equal); +} + +uint32_t +ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + if (ec_is_zero(P)) + return 0; + + uint32_t x_is_zero, tmp_is_zero; + fp2_t t0, t1, t2; + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t0, &t1); + fp2_mul(&t2, &t2, &E->A); + fp2_mul(&t1, &t1, &E->C); + fp2_add(&t1, &t1, &t1); + fp2_add(&t0, &t1, &t2); // 4 (CX^2+CZ^2+AXZ) + + x_is_zero = fp2_is_zero(&P->x); + tmp_is_zero = fp2_is_zero(&t0); + + // two torsion if x or x^2 + Ax + 1 is zero + return x_is_zero | tmp_is_zero; +} + +uint32_t +ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + ec_point_t test; + xDBL_A24(&test, P, &E->A24, E->is_A24_computed_and_normalized); + return ec_is_two_torsion(&test, E); +} + +uint32_t +ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E) +{ // Check if basis points (P, Q) form a full 2^t-basis + ec_point_t P2, Q2; + xDBL_A24(&P2, &B->P, &E->A24, E->is_A24_computed_and_normalized); + xDBL_A24(&Q2, &B->Q, &E->A24, E->is_A24_computed_and_normalized); + return (ec_is_two_torsion(&P2, E) & ec_is_two_torsion(&Q2, E) & ~ec_is_equal(&P2, &Q2)); +} + +int +ec_curve_verify_A(const fp2_t *A) +{ // Verify the Montgomery coefficient A is valid (A^2-4 \ne 0) + // Return 1 if curve is valid, 0 otherwise + fp2_t t; + fp2_set_one(&t); + fp_add(&t.re, &t.re, &t.re); // t=2 + if (fp2_is_equal(A, &t)) + return 0; + fp_neg(&t.re, &t.re); // t=-2 + if (fp2_is_equal(A, &t)) + return 0; + return 1; +} + +int +ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A) +{ // Initialize the curve from the A coefficient and check it is valid + // Return 1 if curve is valid, 0 otherwise + ec_curve_init(E); + fp2_copy(&E->A, A); // Set A + return ec_curve_verify_A(A); +} + +void +ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve) +{ // j-invariant computation for Montgommery coefficient A2=(A+2C:4C) + fp2_t t0, t1; + + fp2_sqr(&t1, &curve->C); + fp2_sqr(j_inv, &curve->A); + fp2_add(&t0, &t1, &t1); + fp2_sub(&t0, j_inv, &t0); + fp2_sub(&t0, &t0, &t1); + fp2_sub(j_inv, &t0, &t1); + fp2_sqr(&t1, &t1); + fp2_mul(j_inv, j_inv, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_sqr(&t1, &t0); + fp2_mul(&t0, &t0, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_inv(j_inv); + fp2_mul(j_inv, &t0, j_inv); +} + +void +xDBL_E0(ec_point_t *Q, const ec_point_t *P) +{ // Doubling of a Montgomery point in projective coordinates (X:Z) on the curve E0 with (A:C) = (0:1). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C) = (0:1). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&Q->z, &t1, &t2); + fp2_mul(&Q->z, &Q->z, &t2); +} + +void +xDBL(ec_point_t *Q, const ec_point_t *P, const ec_point_t *AC) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). Computation of coefficient values A+2C and 4C + // on-the-fly. + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t3, &AC->z, &AC->z); + fp2_mul(&t1, &t1, &t3); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&t0, &t3, &AC->x); + fp2_mul(&t0, &t0, &t2); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and + // the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + if (!A24_normalized) + fp2_mul(&t1, &t1, &A24->z); + fp2_mul(&Q->x, &t0, &t1); + fp2_mul(&t0, &t2, &A24->x); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ) +{ // Differential addition of Montgomery points in projective coordinates (X:Z). + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and difference + // PQ=P-Q=(XPQ:ZPQ). + // Output: projective Montgomery point R <- P+Q = (XR:ZR) such that x(P+Q)=XR/ZR. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_add(&t2, &Q->x, &Q->z); + fp2_sub(&t3, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t3); + fp2_mul(&t1, &t1, &t2); + fp2_add(&t2, &t0, &t1); + fp2_sub(&t3, &t0, &t1); + fp2_sqr(&t2, &t2); + fp2_sqr(&t3, &t3); + fp2_mul(&t2, &PQ->z, &t2); + fp2_mul(&R->z, &PQ->x, &t3); + fp2_copy(&R->x, &t2); +} + +void +xDBLADD(ec_point_t *R, + ec_point_t *S, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_point_t *A24, + const bool A24_normalized) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, the difference + // PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points R <- 2*P = (XR:ZR) such that x(2P)=XR/ZR, and S <- P+Q = (XS:ZS) such that = + // x(Q+P)=XS/ZS. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&R->x, &t0); + fp2_sub(&t2, &Q->x, &Q->z); + fp2_add(&S->x, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t2); + fp2_sqr(&R->z, &t1); + fp2_mul(&t1, &t1, &S->x); + fp2_sub(&t2, &R->x, &R->z); + if (!A24_normalized) + fp2_mul(&R->z, &R->z, &A24->z); + fp2_mul(&R->x, &R->x, &R->z); + fp2_mul(&S->x, &A24->x, &t2); + fp2_sub(&S->z, &t0, &t1); + fp2_add(&R->z, &R->z, &S->x); + fp2_add(&S->x, &t0, &t1); + fp2_mul(&R->z, &R->z, &t2); + fp2_sqr(&S->z, &S->z); + fp2_sqr(&S->x, &S->x); + fp2_mul(&S->z, &S->z, &PQ->x); + fp2_mul(&S->x, &S->x, &PQ->z); +} + +void +xMUL(ec_point_t *Q, const ec_point_t *P, const digit_t *k, const int kbits, const ec_curve_t *curve) +{ // The Montgomery ladder + // Input: projective Montgomery point P=(XP:ZP) such that xP=XP/ZP, a scalar k of bitlength kbits, and + // the Montgomery curve constants (A:C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points Q <- k*P = (XQ:ZQ) such that x(k*P)=XQ/ZQ. + ec_point_t R0, R1, A24; + digit_t mask; + unsigned int bit, prevbit = 0, swap; + + if (!curve->is_A24_computed_and_normalized) { + // Computation of A24=(A+2C:4C) + fp2_add(&A24.x, &curve->C, &curve->C); + fp2_add(&A24.z, &A24.x, &A24.x); + fp2_add(&A24.x, &A24.x, &curve->A); + } else { + fp2_copy(&A24.x, &curve->A24.x); + fp2_copy(&A24.z, &curve->A24.z); + // Assert A24 has been normalised + assert(fp2_is_one(&A24.z)); + } + + // R0 <- (1:0), R1 <- P + ec_point_init(&R0); + fp2_copy(&R1.x, &P->x); + fp2_copy(&R1.z, &P->z); + + // Main loop + for (int i = kbits - 1; i >= 0; i--) { + bit = (k[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t)swap; + + cswap_points(&R0, &R1, mask); + xDBLADD(&R0, &R1, &R0, &R1, P, &A24, true); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t)swap; + cswap_points(&R0, &R1, mask); + + fp2_copy(&Q->x, &R0.x); + fp2_copy(&Q->z, &R0.z); +} + +int +xDBLMUL(ec_point_t *S, + const ec_point_t *P, + const digit_t *k, + const ec_point_t *Q, + const digit_t *l, + const ec_point_t *PQ, + const int kbits, + const ec_curve_t *curve) +{ // The Montgomery biladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, scalars k and l of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants (A:C). + // Output: projective Montgomery point S <- k*P + l*Q = (XS:ZS) such that x(k*P + l*Q)=XS/ZS. + + int i, A_is_zero; + digit_t evens, mevens, bitk0, bitl0, maskk, maskl, temp, bs1_ip1, bs2_ip1, bs1_i, bs2_i, h; + digit_t sigma[2] = { 0 }, pre_sigma = 0; + digit_t k_t[NWORDS_ORDER], l_t[NWORDS_ORDER], one[NWORDS_ORDER] = { 0 }, r[2 * BITS] = { 0 }; + ec_point_t DIFF1a, DIFF1b, DIFF2a, DIFF2b, R[3] = { 0 }, T[3]; + + // differential additions formulas are invalid in this case + if (ec_has_zero_coordinate(P) | ec_has_zero_coordinate(Q) | ec_has_zero_coordinate(PQ)) + return 0; + + // Derive sigma according to parity + bitk0 = (k[0] & 1); + bitl0 = (l[0] & 1); + maskk = 0 - bitk0; // Parity masks: 0 if even, otherwise 1...1 + maskl = 0 - bitl0; + sigma[0] = (bitk0 ^ 1); + sigma[1] = (bitl0 ^ 1); + evens = sigma[0] + sigma[1]; // Count number of even scalars + mevens = 0 - (evens & 1); // Mask mevens <- 0 if # even of scalars = 0 or 2, otherwise mevens = 1...1 + + // If k and l are both even or both odd, pick sigma = (0,1) + sigma[0] = (sigma[0] & mevens); + sigma[1] = (sigma[1] & mevens) | (1 & ~mevens); + + // Convert even scalars to odd + one[0] = 1; + mp_sub(k_t, k, one, NWORDS_ORDER); + mp_sub(l_t, l, one, NWORDS_ORDER); + select_ct(k_t, k_t, k, maskk, NWORDS_ORDER); + select_ct(l_t, l_t, l, maskl, NWORDS_ORDER); + + // Scalar recoding + for (i = 0; i < kbits; i++) { + // If sigma[0] = 1 swap k_t and l_t + maskk = 0 - (sigma[0] ^ pre_sigma); + swap_ct(k_t, l_t, maskk, NWORDS_ORDER); + + if (i == kbits - 1) { + bs1_ip1 = 0; + bs2_ip1 = 0; + } else { + bs1_ip1 = mp_shiftr(k_t, 1, NWORDS_ORDER); + bs2_ip1 = mp_shiftr(l_t, 1, NWORDS_ORDER); + } + bs1_i = k_t[0] & 1; + bs2_i = l_t[0] & 1; + + r[2 * i] = bs1_i ^ bs1_ip1; + r[2 * i + 1] = bs2_i ^ bs2_ip1; + + // Revert sigma if second bit, r_(2i+1), is 1 + pre_sigma = sigma[0]; + maskk = 0 - r[2 * i + 1]; + select_ct(&temp, &sigma[0], &sigma[1], maskk, 1); + select_ct(&sigma[1], &sigma[1], &sigma[0], maskk, 1); + sigma[0] = temp; + } + + // Point initialization + ec_point_init(&R[0]); + maskk = 0 - sigma[0]; + select_point(&R[1], P, Q, maskk); + select_point(&R[2], Q, P, maskk); + + fp2_copy(&DIFF1a.x, &R[1].x); + fp2_copy(&DIFF1a.z, &R[1].z); + fp2_copy(&DIFF1b.x, &R[2].x); + fp2_copy(&DIFF1b.z, &R[2].z); + + // Initialize DIFF2a <- P+Q, DIFF2b <- P-Q + xADD(&R[2], &R[1], &R[2], PQ); + if (ec_has_zero_coordinate(&R[2])) + return 0; // non valid formulas + + fp2_copy(&DIFF2a.x, &R[2].x); + fp2_copy(&DIFF2a.z, &R[2].z); + fp2_copy(&DIFF2b.x, &PQ->x); + fp2_copy(&DIFF2b.z, &PQ->z); + + A_is_zero = fp2_is_zero(&curve->A); + + // Main loop + for (i = kbits - 1; i >= 0; i--) { + h = r[2 * i] + r[2 * i + 1]; // in {0, 1, 2} + maskk = 0 - (h & 1); + select_point(&T[0], &R[0], &R[1], maskk); + maskk = 0 - (h >> 1); + select_point(&T[0], &T[0], &R[2], maskk); + if (A_is_zero) { + xDBL_E0(&T[0], &T[0]); + } else { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(&T[0], &T[0], &curve->A24, true); + } + + maskk = 0 - r[2 * i + 1]; // in {0, 1} + select_point(&T[1], &R[0], &R[1], maskk); + select_point(&T[2], &R[1], &R[2], maskk); + + cswap_points(&DIFF1a, &DIFF1b, maskk); + xADD(&T[1], &T[1], &T[2], &DIFF1a); + xADD(&T[2], &R[0], &R[2], &DIFF2a); + + // If hw (mod 2) = 1 then swap DIFF2a and DIFF2b + maskk = 0 - (h & 1); + cswap_points(&DIFF2a, &DIFF2b, maskk); + + // R <- T + copy_point(&R[0], &T[0]); + copy_point(&R[1], &T[1]); + copy_point(&R[2], &T[2]); + } + + // Output R[evens] + select_point(S, &R[0], &R[1], mevens); + + maskk = 0 - (bitk0 & bitl0); + select_point(S, S, &R[2], maskk); + return 1; +} + +int +ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *E) +{ // The 3-point Montgomery ladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, a scalar k of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C/4C:1). + // Output: projective Montgomery point R <- P + m*Q = (XR:ZR) such that x(P + m*Q)=XR/ZR. + assert(E->is_A24_computed_and_normalized); + if (!fp2_is_one(&E->A24.z)) { + return 0; + } + // Formulas are not valid in that case + if (ec_has_zero_coordinate(PQ)) { + return 0; + } + + ec_point_t X0, X1, X2; + copy_point(&X0, Q); + copy_point(&X1, P); + copy_point(&X2, PQ); + + int i, j; + digit_t t; + for (i = 0; i < NWORDS_ORDER; i++) { + t = 1; + for (j = 0; j < RADIX; j++) { + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + xDBLADD(&X0, &X1, &X0, &X1, &X2, &E->A24, true); + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + t <<= 1; + }; + }; + copy_point(R, &X1); + return 1; +} + +// WRAPPERS to export + +void +ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve) +{ + // If A24 = ((A+2)/4 : 1) we save multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + } else { + // Otherwise we compute A24 on the fly for doubling + xDBL(res, P, (const ec_point_t *)curve); + } +} + +void +ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve) +{ + if (n == 0) { + copy_point(res, P); + return; + } + + // When the chain is long enough, we should normalise A24 + if (n > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is normalized we can save some multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + for (int i = 0; i < n - 1; i++) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, res, &curve->A24, true); + } + } else { + // Otherwise we do normal doubling + xDBL(res, P, (const ec_point_t *)curve); + for (int i = 0; i < n - 1; i++) { + xDBL(res, res, (const ec_point_t *)curve); + } + } +} + +void +ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve) +{ + ec_dbl_iter(&res->P, n, &B->P, curve); + ec_dbl_iter(&res->Q, n, &B->Q, curve); + ec_dbl_iter(&res->PmQ, n, &B->PmQ, curve); +} + +void +ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve) +{ + // For large scalars it's worth normalising anyway + if (kbits > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is computed and normalized we save some Fp2 multiplications + xMUL(res, P, scalar, kbits, curve); +} + +int +ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve) +{ + if (fp2_is_zero(&PQ->PmQ.z)) + return 0; + + /* Differential additions behave badly when PmQ = (0:1), so we need to + * treat this case specifically. Since we assume P, Q are a basis, this + * can happen only if kbits==1 */ + if (kbits == 1) { + // Sanity check: our basis should be given by 2-torsion points + if (!ec_is_two_torsion(&PQ->P, curve) || !ec_is_two_torsion(&PQ->Q, curve) || + !ec_is_two_torsion(&PQ->PmQ, curve)) + return 0; + digit_t bP, bQ; + bP = (scalarP[0] & 1); + bQ = (scalarQ[0] & 1); + if (bP == 0 && bQ == 0) + ec_point_init(res); //(1: 0) + else if (bP == 1 && bQ == 0) + copy_point(res, &PQ->P); + else if (bP == 0 && bQ == 1) + copy_point(res, &PQ->Q); + else if (bP == 1 && bQ == 1) + copy_point(res, &PQ->PmQ); + else // should never happen + assert(0); + return 1; + } else { + ec_curve_t E; + copy_curve(&E, curve); + + if (!fp2_is_zero(&curve->A)) { // If A is not zero normalize + ec_curve_normalize_A24(&E); + } + return xDBLMUL(res, &PQ->P, scalarP, &PQ->Q, scalarQ, &PQ->PmQ, kbits, (const ec_curve_t *)&E); + } +} diff --git a/src/pqm4/sqisign_lvl3/ref/ec.h b/src/pqm4/sqisign_lvl3/ref/ec.h new file mode 100644 index 0000000..ee2be38 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/ec.h @@ -0,0 +1,668 @@ +/** @file + * + * @authors Luca De Feo, Francisco RH + * + * @brief Elliptic curve stuff + */ + +#ifndef EC_H +#define EC_H +#include +#include +#include +#include +#include + +/** @defgroup ec Elliptic curves + * @{ + */ + +/** @defgroup ec_t Data structures + * @{ + */ + +/** @brief Projective point on the Kummer line E/pm 1 in Montgomery coordinates + * + * @typedef ec_point_t + * + * @struct ec_point_t + * + * A projective point in (X:Z) or (X:Y:Z) coordinates (tbd). + */ +typedef struct ec_point_t +{ + fp2_t x; + fp2_t z; +} ec_point_t; + +/** @brief Projective point in Montgomery coordinates + * + * @typedef jac_point_t + * + * @struct jac_point_t + * + * A projective point in (X:Y:Z) coordinates + */ +typedef struct jac_point_t +{ + fp2_t x; + fp2_t y; + fp2_t z; +} jac_point_t; + +/** @brief Addition components + * + * @typedef add_components_t + * + * @struct add_components_t + * + * 3 components u,v,w that define the (X:Z) coordinates of both + * addition and substraction of two distinct points with + * P+Q =(u-v:w) and P-Q = (u+v=w) + */ +typedef struct add_components_t +{ + fp2_t u; + fp2_t v; + fp2_t w; +} add_components_t; + +/** @brief A basis of a torsion subgroup + * + * @typedef ec_basis_t + * + * @struct ec_basis_t + * + * A pair of points (or a triplet, tbd) forming a basis of a torsion subgroup. + */ +typedef struct ec_basis_t +{ + ec_point_t P; + ec_point_t Q; + ec_point_t PmQ; +} ec_basis_t; + +/** @brief An elliptic curve + * + * @typedef ec_curve_t + * + * @struct ec_curve_t + * + * An elliptic curve in projective Montgomery form + */ +typedef struct ec_curve_t +{ + fp2_t A; + fp2_t C; ///< cannot be 0 + ec_point_t A24; // the point (A+2 : 4C) + bool is_A24_computed_and_normalized; // says if A24 has been computed and normalized +} ec_curve_t; + +/** @brief An isogeny of degree a power of 2 + * + * @typedef ec_isog_even_t + * + * @struct ec_isog_even_t + */ +typedef struct ec_isog_even_t +{ + ec_curve_t curve; ///< The domain curve + ec_point_t kernel; ///< A kernel generator + unsigned length; ///< The length as a 2-isogeny walk +} ec_isog_even_t; + +/** @brief Isomorphism of Montgomery curves + * + * @typedef ec_isom_t + * + * @struct ec_isom_t + * + * The isomorphism is given by the map maps (X:Z) ↦ ( (Nx X + Nz Z) : (D Z) ) + */ +typedef struct ec_isom_t +{ + fp2_t Nx; + fp2_t Nz; + fp2_t D; +} ec_isom_t; + +// end ec_t +/** @} + */ + +/** @defgroup ec_curve_t Curves and isomorphisms + * @{ + */ + +// Initalisation for curves and points +void ec_curve_init(ec_curve_t *E); +void ec_point_init(ec_point_t *P); + +/** + * @brief Verify that a Montgomery coefficient is valid + * + * @param A an fp2_t + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_verify_A(const fp2_t *A); + +/** + * @brief Initialize an elliptic curve from a coefficient + * + * @param A an fp2_t + * @param E the elliptic curve to initialize + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A); + +// Copying points, bases and curves +static inline void +copy_point(ec_point_t *P, const ec_point_t *Q) +{ + fp2_copy(&P->x, &Q->x); + fp2_copy(&P->z, &Q->z); +} + +static inline void +copy_basis(ec_basis_t *B1, const ec_basis_t *B0) +{ + copy_point(&B1->P, &B0->P); + copy_point(&B1->Q, &B0->Q); + copy_point(&B1->PmQ, &B0->PmQ); +} + +static inline void +copy_curve(ec_curve_t *E1, const ec_curve_t *E2) +{ + fp2_copy(&(E1->A), &(E2->A)); + fp2_copy(&(E1->C), &(E2->C)); + E1->is_A24_computed_and_normalized = E2->is_A24_computed_and_normalized; + copy_point(&E1->A24, &E2->A24); +} + +// Functions for working with the A24 point and normalisation + +/** + * @brief Reduce (A : C) to (A/C : 1) in place + * + * @param E a curve + */ +void ec_normalize_curve(ec_curve_t *E); + +/** + * @brief Reduce (A + 2 : 4C) to ((A+2)/4C : 1) in place + * + * @param E a curve + */ +void ec_curve_normalize_A24(ec_curve_t *E); + +/** + * @brief Normalise both (A : C) and (A + 2 : 4C) as above, in place + * + * @param E a curve + */ +void ec_normalize_curve_and_A24(ec_curve_t *E); + +/** + * @brief Given a curve E, compute (A+2 : 4C) + * + * @param A24 the value (A+2 : 4C) to return into + * @param E a curve + */ +static inline void +AC_to_A24(ec_point_t *A24, const ec_curve_t *E) +{ + // Maybe we already have this computed + if (E->is_A24_computed_and_normalized) { + copy_point(A24, &E->A24); + return; + } + + // A24 = (A+2C : 4C) + fp2_add(&A24->z, &E->C, &E->C); + fp2_add(&A24->x, &E->A, &A24->z); + fp2_add(&A24->z, &A24->z, &A24->z); +} + +/** + * @brief Given a curve the point (A+2 : 4C) compute the curve coefficients (A : C) + * + * @param E a curve to compute + * @param A24 the value (A+2 : 4C) + */ +static inline void +A24_to_AC(ec_curve_t *E, const ec_point_t *A24) +{ + // (A:C) = ((A+2C)*2-4C : 4C) + fp2_add(&E->A, &A24->x, &A24->x); + fp2_sub(&E->A, &E->A, &A24->z); + fp2_add(&E->A, &E->A, &E->A); + fp2_copy(&E->C, &A24->z); +} + +/** + * @brief j-invariant. + * + * @param j_inv computed j_invariant + * @param curve input curve + */ +void ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve); + +/** + * @brief Isomorphism of elliptic curve + * Takes as input two isomorphic Kummer lines in Montgomery form, and output an isomorphism between + * them + * + * @param isom computed isomorphism + * @param from domain curve + * @param to image curve + * @return 0xFFFFFFFF if there was an error during the computation, zero otherwise + */ +uint32_t ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to); + +/** + * @brief In-place evaluation of an isomorphism + * + * @param P a point + * @param isom an isomorphism + */ +void ec_iso_eval(ec_point_t *P, ec_isom_t *isom); + +/** @} + */ +/** @defgroup ec_point_t Point operations + * @{ + */ + +/** + * @brief Point equality + * + * @param P a point + * @param Q a point + * @return 0xFFFFFFFF if equal, zero otherwise + */ +uint32_t ec_is_equal(const ec_point_t *P, const ec_point_t *Q); + +/** + * @brief Point equality + * + * @param P a point + * @return 0xFFFFFFFF if point at infinity, zero otherwise + */ +uint32_t ec_is_zero(const ec_point_t *P); + +/** + * @brief Two torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Four torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Reduce Z-coordinate of point in place + * + * @param P a point + */ +void ec_normalize_point(ec_point_t *P); + +void xDBL_E0(ec_point_t *Q, const ec_point_t *P); +void xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ); +void xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized); + +/** + * @brief Point doubling + * + * @param res computed double of P + * @param P a point + * @param curve an elliptic curve + */ +void ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve); + +/** + * @brief Point iterated doubling + * + * @param res computed double of P + * @param P a point + * @param n the number of double + * @param curve the curve on which P lays + */ +void ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Iterated doubling for a basis P, Q, PmQ + * + * @param res the computed iterated double of basis B + * @param n the number of doubles + * @param B the basis to double + * @param curve the parent curve of the basis + */ +void ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve); + +/** + * @brief Point multiplication + * + * @param res computed scalar * P + * @param curve the curve + * @param scalar an unsigned multi-precision integer + * @param P a point + * @param kbits numer of bits of the scalar + */ +void ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Combination P+m*Q + * + * @param R computed P + m * Q + * @param curve the curve + * @param m an unsigned multi-precision integer + * @param P a point + * @param Q a point + * @param PQ the difference P-Q + * @return 0 if there was an error, 1 otherwise + */ +int ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *curve); + +/** + * @brief Linear combination of points of a basis + * + * @param res computed scalarP * P + scalarQ * Q + * @param scalarP an unsigned multi-precision integer + * @param scalarQ an unsigned multi-precision integer + * @param kbits number of bits of the scalars, or n for points of order 2^n + * @param PQ a torsion basis consisting of points P and Q + * @param curve the curve + * + * @return 0 if there was an error, 1 otherwise + */ +int ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve); + +// end point computations +/** + * @} + */ + +/** @defgroup ec_dlog_t Torsion basis computations + * @{ + */ + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve along with a hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * + * @return A hint + * + * The algorithm is deterministc + */ +uint8_t ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f); + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve and a given hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * @param hint the hint + * + * @return 1 is the basis is valid, 0 otherwise + * + * The algorithm is deterministc + */ +int ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint); +/** // end basis computations + * @} + */ + +/** @defgroup ec_isog_t Isogenies + * @{ + */ + +/** + * @brief Evaluate isogeny of even degree on list of points. + * Returns 0 if successful and -1 if kernel has the wrong order or includes (0:1). + * + * @param image computed image curve + * @param phi isogeny + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points); + +/** + * @brief Multiplicative strategy for a short isogeny chain. Returns 1 if successfull and -1 + * if kernel has the wrong order or includes (0:1) when special=false. + * + * @param curve domain curve, to be overwritten by the codomain curve. + * @param kernel a kernel generator of order 2^len + * @param len the length of t he 2-isogeny chain + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * @param special if true, allow isogenies with (0:1) in the kernel + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special); + +/** + * @brief Recover Y-coordinate from X-coordinate and curve coefficients. + * + * @param y: a y-coordinate + * @param Px: a x-coordinate + * @param curve: the elliptic curve + * + * @return 0xFFFFFFFF if the point was on the curve, 0 otherwise + */ +uint32_t ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve); + +// Jacobian point init and copying +void jac_init(jac_point_t *P); +void copy_jac_point(jac_point_t *P, const jac_point_t *Q); + +/** + * @brief Test if two Jacobian points are equal + * + * @param P: a point + * @param Q: a point + * + * @return 0xFFFFFFFF if they are equal, 0 otherwise + */ +uint32_t jac_is_equal(const jac_point_t *P, const jac_point_t *Q); + +// Convert from Jacobian to x-only (just drop the Y-coordinate) +void jac_to_xz(ec_point_t *P, const jac_point_t *xyP); +// Convert from Jacobian coordinates in Montgomery model to Weierstrass +void jac_to_ws(jac_point_t *P, fp2_t *t, fp2_t *ao3, const jac_point_t *Q, const ec_curve_t *curve); +void jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve); + +// Jacobian arithmetic +void jac_neg(jac_point_t *Q, const jac_point_t *P); +void ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); +void DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC); +void DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t); +void jac_to_xz_add_components(add_components_t *uvw, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + * + * + * Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and + * the point P = (X/Z : 1). For generic implementation see lift_basis() + */ +uint32_t lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + */ +uint32_t lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Check if basis points (P, Q) form a full 4-basis + * + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if they form a basis, 0 otherwise + */ +uint32_t ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E); + +/* + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Test functions for printing and order checking, only used in debug mode + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +/** + * @brief Check if a point (X : Z) has order exactly 2^t + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_point_order_twof(const ec_point_t *P, const ec_curve_t *E, int t) +{ + ec_point_t test; + ec_curve_t curve; + test = *P; + copy_curve(&curve, E); + + if (ec_is_zero(&test)) + return 0; + // Scale point by 2^(t-1) + ec_dbl_iter(&test, t - 1, &test, &curve); + // If it's zero now, it doesnt have order 2^t + if (ec_is_zero(&test)) + return 0; + // Ensure [2^t] P = 0 + ec_dbl(&test, &test, &curve); + return ec_is_zero(&test); +} + +/** + * @brief Check if basis points (P, Q, PmQ) all have order exactly 2^t + * + * @param B: a basis + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_basis_order_twof(const ec_basis_t *B, const ec_curve_t *E, int t) +{ + int check_P = test_point_order_twof(&B->P, E, t); + int check_Q = test_point_order_twof(&B->Q, E, t); + int check_PmQ = test_point_order_twof(&B->PmQ, E, t); + + return check_P & check_Q & check_PmQ; +} + +/** + * @brief Check if a Jacobian point (X : Y : Z) has order exactly 2^f + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_jac_order_twof(const jac_point_t *P, const ec_curve_t *E, int t) +{ + jac_point_t test; + test = *P; + if (fp2_is_zero(&test.z)) + return 0; + for (int i = 0; i < t - 1; i++) { + DBL(&test, &test, E); + } + if (fp2_is_zero(&test.z)) + return 0; + DBL(&test, &test, E); + return (fp2_is_zero(&test.z)); +} + +// Prints the x-coordinate of the point (X : 1) +static void +ec_point_print(const char *name, ec_point_t P) +{ + fp2_t a; + if (fp2_is_zero(&P.z)) { + printf("%s = INF\n", name); + } else { + fp2_copy(&a, &P.z); + fp2_inv(&a); + fp2_mul(&a, &a, &P.x); + fp2_print(name, &a); + } +} + +// Prints the Montgomery coefficient A +static void +ec_curve_print(const char *name, ec_curve_t E) +{ + fp2_t a; + fp2_copy(&a, &E.C); + fp2_inv(&a); + fp2_mul(&a, &a, &E.A); + fp2_print(name, &a); +} + +#endif +// end isogeny computations +/** + * @} + */ + +// end ec +/** + * @} + */ diff --git a/src/pqm4/sqisign_lvl3/ref/ec_jac.c b/src/pqm4/sqisign_lvl3/ref/ec_jac.c new file mode 100644 index 0000000..20ca68c --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/ec_jac.c @@ -0,0 +1,335 @@ +#include +#include + +void +jac_init(jac_point_t *P) +{ // Initialize Montgomery in Jacobian coordinates as identity element (0:1:0) + fp2_set_zero(&P->x); + fp2_set_one(&P->y); + fp2_set_zero(&P->z); +} + +uint32_t +jac_is_equal(const jac_point_t *P, const jac_point_t *Q) +{ // Evaluate if two points in Jacobian coordinates (X:Y:Z) are equal + // Returns 1 (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1, t2, t3; + + fp2_sqr(&t0, &Q->z); + fp2_mul(&t2, &P->x, &t0); // x1*z2^2 + fp2_sqr(&t1, &P->z); + fp2_mul(&t3, &Q->x, &t1); // x2*z1^2 + fp2_sub(&t2, &t2, &t3); + + fp2_mul(&t0, &t0, &Q->z); + fp2_mul(&t0, &P->y, &t0); // y1*z2^3 + fp2_mul(&t1, &t1, &P->z); + fp2_mul(&t1, &Q->y, &t1); // y2*z1^3 + fp2_sub(&t0, &t0, &t1); + + return fp2_is_zero(&t0) & fp2_is_zero(&t2); +} + +void +jac_to_xz(ec_point_t *P, const jac_point_t *xyP) +{ + fp2_copy(&P->x, &xyP->x); + fp2_copy(&P->z, &xyP->z); + fp2_sqr(&P->z, &P->z); + + // If xyP = (0:1:0), we currently have P=(0 : 0) but we want to set P=(1:0) + uint32_t c1, c2; + fp2_t one; + fp2_set_one(&one); + + c1 = fp2_is_zero(&P->x); + c2 = fp2_is_zero(&P->z); + fp2_select(&P->x, &P->x, &one, c1 & c2); +} + +void +jac_to_ws(jac_point_t *Q, fp2_t *t, fp2_t *ao3, const jac_point_t *P, const ec_curve_t *curve) +{ + // Cost of 3M + 2S when A != 0. + fp_t one; + fp2_t a; + /* a = 1 - A^2/3, U = X + (A*Z^2)/3, V = Y, W = Z, T = a*Z^4*/ + fp_set_one(&one); + if (!fp2_is_zero(&(curve->A))) { + fp_div3(&(ao3->re), &(curve->A.re)); + fp_div3(&(ao3->im), &(curve->A.im)); + fp2_sqr(t, &P->z); + fp2_mul(&Q->x, ao3, t); + fp2_add(&Q->x, &Q->x, &P->x); + fp2_sqr(t, t); + fp2_mul(&a, ao3, &(curve->A)); + fp_sub(&(a.re), &one, &(a.re)); + fp_neg(&(a.im), &(a.im)); + fp2_mul(t, t, &a); + } else { + fp2_copy(&Q->x, &P->x); + fp2_sqr(t, &P->z); + fp2_sqr(t, t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve) +{ + // Cost of 1M + 1S when A != 0. + fp2_t t; + /* X = U - (A*W^2)/3, Y = V, Z = W. */ + if (!fp2_is_zero(&(curve->A))) { + fp2_sqr(&t, &P->z); + fp2_mul(&t, &t, ao3); + fp2_sub(&Q->x, &P->x, &t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +copy_jac_point(jac_point_t *P, const jac_point_t *Q) +{ + fp2_copy(&(P->x), &(Q->x)); + fp2_copy(&(P->y), &(Q->y)); + fp2_copy(&(P->z), &(Q->z)); +} + +void +jac_neg(jac_point_t *Q, const jac_point_t *P) +{ + fp2_copy(&Q->x, &P->x); + fp2_neg(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC) +{ // Cost of 6M + 6S. + // Doubling on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding to + // (X/Z^2,Y/Z^3) This version receives the coefficient value A + fp2_t t0, t1, t2, t3; + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // t0 = 3x1^2 + fp2_sqr(&t1, &P->z); // t1 = z1^2 + fp2_mul(&t2, &P->x, &AC->A); + fp2_add(&t2, &t2, &t2); // t2 = 2Ax1 + fp2_add(&t2, &t1, &t2); // t2 = 2Ax1+z1^2 + fp2_mul(&t2, &t1, &t2); // t2 = z1^2(2Ax1+z1^2) + fp2_add(&t2, &t0, &t2); // t2 = alpha = 3x1^2 + z1^2(2Ax1+z1^2) + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); // z2 = 2y1z1 + fp2_sqr(&t0, &Q->z); + fp2_mul(&t0, &t0, &AC->A); // t0 = 4Ay1^2z1^2 + fp2_sqr(&t1, &P->y); + fp2_add(&t1, &t1, &t1); // t1 = 2y1^2 + fp2_add(&t3, &P->x, &P->x); // t3 = 2x1 + fp2_mul(&t3, &t1, &t3); // t3 = 4x1y1^2 + fp2_sqr(&Q->x, &t2); // x2 = alpha^2 + fp2_sub(&Q->x, &Q->x, &t0); // x2 = alpha^2 - 4Ay1^2z1^2 + fp2_sub(&Q->x, &Q->x, &t3); + fp2_sub(&Q->x, &Q->x, &t3); // x2 = alpha^2 - 4Ay1^2z1^2 - 8x1y1^2 + fp2_sub(&Q->y, &t3, &Q->x); // y2 = 4x1y1^2 - x2 + fp2_mul(&Q->y, &Q->y, &t2); // y2 = alpha(4x1y1^2 - x2) + fp2_sqr(&t1, &t1); // t1 = 4y1^4 + fp2_sub(&Q->y, &Q->y, &t1); + fp2_sub(&Q->y, &Q->y, &t1); // y2 = alpha(4x1y1^2 - x2) - 8y1^4 + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t) +{ // Cost of 3M + 5S. + // Doubling on a Weierstrass curve, representation in modified Jacobian coordinates + // (X:Y:Z:T=a*Z^4) corresponding to (X/Z^2,Y/Z^3), where a is the curve coefficient. + // Formula from https://hyperelliptic.org/EFD/g1p/auto-shortw-modified.html + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_t xx, c, cc, r, s, m; + // XX = X^2 + fp2_sqr(&xx, &P->x); + // A = 2*Y^2 + fp2_sqr(&c, &P->y); + fp2_add(&c, &c, &c); + // AA = A^2 + fp2_sqr(&cc, &c); + // R = 2*AA + fp2_add(&r, &cc, &cc); + // S = (X+A)^2-XX-AA + fp2_add(&s, &P->x, &c); + fp2_sqr(&s, &s); + fp2_sub(&s, &s, &xx); + fp2_sub(&s, &s, &cc); + // M = 3*XX+T1 + fp2_add(&m, &xx, &xx); + fp2_add(&m, &m, &xx); + fp2_add(&m, &m, t); + // X3 = M^2-2*S + fp2_sqr(&Q->x, &m); + fp2_sub(&Q->x, &Q->x, &s); + fp2_sub(&Q->x, &Q->x, &s); + // Z3 = 2*Y*Z + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); + // Y3 = M*(S-X3)-R + fp2_sub(&Q->y, &s, &Q->x); + fp2_mul(&Q->y, &Q->y, &m); + fp2_sub(&Q->y, &Q->y, &r); + // T3 = 2*R*T1 + fp2_mul(u, t, &r); + fp2_add(u, u, u); + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +select_jac_point(jac_point_t *Q, const jac_point_t *P1, const jac_point_t *P2, const digit_t option) +{ // Select points + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->y), &(P1->y), &(P2->y), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Addition on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding + // to (x,y) = (X/Z^2,Y/Z^3) This version receives the coefficient value A + // + // Complete routine, to handle all edge cases: + // if ZP == 0: # P == inf + // return Q + // if ZQ == 0: # Q == inf + // return P + // dy <- YQ*ZP**3 - YP*ZQ**3 + // dx <- XQ*ZP**2 - XP*ZQ**2 + // if dx == 0: # x1 == x2 + // if dy == 0: # ... and y1 == y2: doubling case + // dy <- ZP*ZQ * (3*XP^2 + ZP^2 * (2*A*XP + ZP^2)) + // dx <- 2*YP*ZP + // else: # ... but y1 != y2, thus P = -Q + // return inf + // XR <- dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) + // YR <- dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3 + // ZR <- dx * ZP * ZQ + + // Constant time processing: + // - The case for P == 0 or Q == 0 is handled at the end with conditional select + // - dy and dx are computed for both the normal and doubling cases, we switch when + // dx == dy == 0 for the normal case. + // - If we have that P = -Q then dx = 0 and so ZR will be zero, giving us the point + // at infinity for "free". + // + // These current formula are expensive and I'm probably missing some tricks... + // Thought I'd get the ball rolling. + // Cost 17M + 6S + 13a + fp2_t t0, t1, t2, t3, u1, u2, v1, dx, dy; + + /* If P is zero or Q is zero we will conditionally swap before returning. */ + uint32_t ctl1 = fp2_is_zero(&P->z); + uint32_t ctl2 = fp2_is_zero(&Q->z); + + /* Precompute some values */ + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + + /* Compute dy and dx for ordinary case */ + fp2_mul(&v1, &t1, &Q->z); // v1 = z2^3 + fp2_mul(&t2, &t0, &P->z); // t2 = z1^3 + fp2_mul(&v1, &v1, &P->y); // v1 = y1z2^3 + fp2_mul(&t2, &t2, &Q->y); // t2 = y2z1^3 + fp2_sub(&dy, &t2, &v1); // dy = y2z1^3 - y1z2^3 + fp2_mul(&u2, &t0, &Q->x); // u2 = x2z1^2 + fp2_mul(&u1, &t1, &P->x); // u1 = x1z2^2 + fp2_sub(&dx, &u2, &u1); // dx = x2z1^2 - x1z2^2 + + /* Compute dy and dx for doubling case */ + fp2_add(&t1, &P->y, &P->y); // dx_dbl = t1 = 2y1 + fp2_add(&t2, &AC->A, &AC->A); // t2 = 2A + fp2_mul(&t2, &t2, &P->x); // t2 = 2Ax1 + fp2_add(&t2, &t2, &t0); // t2 = 2Ax1 + z1^2 + fp2_mul(&t2, &t2, &t0); // t2 = z1^2 * (2Ax1 + z1^2) + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t2, &t2, &t0); // t2 = x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 2*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 3*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_mul(&t2, &t2, &Q->z); // dy_dbl = t2 = z2 * (3*x1^2 + z1^2 * (2Ax1 + z1^2)) + + /* If dx is zero and dy is zero swap with double variables */ + uint32_t ctl = fp2_is_zero(&dx) & fp2_is_zero(&dy); + fp2_select(&dx, &dx, &t1, ctl); + fp2_select(&dy, &dy, &t2, ctl); + + /* Some more precomputations */ + fp2_mul(&t0, &P->z, &Q->z); // t0 = z1z2 + fp2_sqr(&t1, &t0); // t1 = z1z2^2 + fp2_sqr(&t2, &dx); // t2 = dx^2 + fp2_sqr(&t3, &dy); // t3 = dy^2 + + /* Compute x3 = dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) */ + fp2_mul(&R->x, &AC->A, &t1); // x3 = A*(z1z2)^2 + fp2_add(&R->x, &R->x, &u1); // x3 = A*(z1z2)^2 + u1 + fp2_add(&R->x, &R->x, &u2); // x3 = A*(z1z2)^2 + u1 + u2 + fp2_mul(&R->x, &R->x, &t2); // x3 = dx^2 * (A*(z1z2)^2 + u1 + u2) + fp2_sub(&R->x, &t3, &R->x); // x3 = dy^2 - dx^2 * (A*(z1z2)^2 + u1 + u2) + + /* Compute y3 = dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3*/ + fp2_mul(&R->y, &u1, &t2); // y3 = u1 * dx^2 + fp2_sub(&R->y, &R->y, &R->x); // y3 = u1 * dx^2 - x3 + fp2_mul(&R->y, &R->y, &dy); // y3 = dy * (u1 * dx^2 - x3) + fp2_mul(&t3, &t2, &dx); // t3 = dx^3 + fp2_mul(&t3, &t3, &v1); // t3 = v1 * dx^3 + fp2_sub(&R->y, &R->y, &t3); // y3 = dy * (u1 * dx^2 - x3) - v1 * dx^3 + + /* Compute z3 = dx * z1 * z2 */ + fp2_mul(&R->z, &dx, &t0); + + /* Finally, we need to set R = P is Q.Z = 0 and R = Q if P.Z = 0 */ + select_jac_point(R, R, Q, ctl1); + select_jac_point(R, R, P, ctl2); +} + +void +jac_to_xz_add_components(add_components_t *add_comp, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Take P and Q in E distinct, two jac_point_t, return three components u,v and w in Fp2 such + // that the xz coordinates of P+Q are (u-v:w) and of P-Q are (u+v:w) + + fp2_t t0, t1, t2, t3, t4, t5, t6; + + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + fp2_mul(&t2, &P->x, &t1); // t2 = x1z2^2 + fp2_mul(&t3, &t0, &Q->x); // t3 = z1^2x2 + fp2_mul(&t4, &P->y, &Q->z); // t4 = y1z2 + fp2_mul(&t4, &t4, &t1); // t4 = y1z2^3 + fp2_mul(&t5, &P->z, &Q->y); // t5 = z1y2 + fp2_mul(&t5, &t5, &t0); // t5 = z1^3y2 + fp2_mul(&t0, &t0, &t1); // t0 = (z1z2)^2 + fp2_mul(&t6, &t4, &t5); // t6 = (z1z_2)^3y1y2 + fp2_add(&add_comp->v, &t6, &t6); // v = 2(z1z_2)^3y1y2 + fp2_sqr(&t4, &t4); // t4 = y1^2z2^6 + fp2_sqr(&t5, &t5); // t5 = z1^6y_2^2 + fp2_add(&t4, &t4, &t5); // t4 = z1^6y_2^2 + y1^2z2^6 + fp2_add(&t5, &t2, &t3); // t5 = x1z2^2 +z_1^2x2 + fp2_add(&t6, &t3, &t3); // t6 = 2z_1^2x2 + fp2_sub(&t6, &t5, &t6); // t6 = lambda = x1z2^2 - z_1^2x2 + fp2_sqr(&t6, &t6); // t6 = lambda^2 = (x1z2^2 - z_1^2x2)^2 + fp2_mul(&t1, &AC->A, &t0); // t1 = A*(z1z2)^2 + fp2_add(&t1, &t5, &t1); // t1 = gamma =A*(z1z2)^2 + x1z2^2 +z_1^2x2 + fp2_mul(&t1, &t1, &t6); // t1 = gamma*lambda^2 + fp2_sub(&add_comp->u, &t4, &t1); // u = z1^6y_2^2 + y1^2z2^6 - gamma*lambda^2 + fp2_mul(&add_comp->w, &t6, &t0); // w = (z1z2)^2(lambda)^2 +} diff --git a/src/pqm4/sqisign_lvl3/ref/ec_params.c b/src/pqm4/sqisign_lvl3/ref/ec_params.c new file mode 100644 index 0000000..ae214aa --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/ec_params.c @@ -0,0 +1,4 @@ +#include +// p+1 divided by the power of 2 +const digit_t p_cofactor_for_2f[1] = {65}; + diff --git a/src/pqm4/sqisign_lvl3/ref/ec_params.h b/src/pqm4/sqisign_lvl3/ref/ec_params.h new file mode 100644 index 0000000..941abd5 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/ec_params.h @@ -0,0 +1,12 @@ +#ifndef EC_PARAMS_H +#define EC_PARAMS_H + +#include + +#define TORSION_EVEN_POWER 376 + +// p+1 divided by the power of 2 +extern const digit_t p_cofactor_for_2f[1]; +#define P_COFACTOR_FOR_2F_BITLENGTH 7 + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/encode_verification.c b/src/pqm4/sqisign_lvl3/ref/encode_verification.c new file mode 100644 index 0000000..fecdb9c --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/encode_verification.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +typedef unsigned char byte_t; + +// digits + +static void +encode_digits(byte_t *enc, const digit_t *x, size_t nbytes) +{ +#ifdef TARGET_BIG_ENDIAN + const size_t ndigits = nbytes / sizeof(digit_t); + const size_t rem = nbytes % sizeof(digit_t); + + for (size_t i = 0; i < ndigits; i++) + ((digit_t *)enc)[i] = BSWAP_DIGIT(x[i]); + if (rem) { + digit_t ld = BSWAP_DIGIT(x[ndigits]); + memcpy(enc + ndigits * sizeof(digit_t), (byte_t *)&ld, rem); + } +#else + memcpy(enc, (const byte_t *)x, nbytes); +#endif +} + +static void +decode_digits(digit_t *x, const byte_t *enc, size_t nbytes, size_t ndigits) +{ + assert(nbytes <= ndigits * sizeof(digit_t)); + memcpy((byte_t *)x, enc, nbytes); + memset((byte_t *)x + nbytes, 0, ndigits * sizeof(digit_t) - nbytes); + +#ifdef TARGET_BIG_ENDIAN + for (size_t i = 0; i < ndigits; i++) + x[i] = BSWAP_DIGIT(x[i]); +#endif +} + +// fp2_t + +static byte_t * +fp2_to_bytes(byte_t *enc, const fp2_t *x) +{ + fp2_encode(enc, x); + return enc + FP2_ENCODED_BYTES; +} + +static const byte_t * +fp2_from_bytes(fp2_t *x, const byte_t *enc) +{ + fp2_decode(x, enc); + return enc + FP2_ENCODED_BYTES; +} + +// curves and points + +static byte_t * +proj_to_bytes(byte_t *enc, const fp2_t *x, const fp2_t *z) +{ + assert(!fp2_is_zero(z)); + fp2_t tmp = *z; + fp2_inv(&tmp); +#ifndef NDEBUG + { + fp2_t chk; + fp2_mul(&chk, z, &tmp); + fp2_t one; + fp2_set_one(&one); + assert(fp2_is_equal(&chk, &one)); + } +#endif + fp2_mul(&tmp, x, &tmp); + enc = fp2_to_bytes(enc, &tmp); + return enc; +} + +static const byte_t * +proj_from_bytes(fp2_t *x, fp2_t *z, const byte_t *enc) +{ + enc = fp2_from_bytes(x, enc); + fp2_set_one(z); + return enc; +} + +static byte_t * +ec_curve_to_bytes(byte_t *enc, const ec_curve_t *curve) +{ + return proj_to_bytes(enc, &curve->A, &curve->C); +} + +static const byte_t * +ec_curve_from_bytes(ec_curve_t *curve, const byte_t *enc) +{ + memset(curve, 0, sizeof(*curve)); + return proj_from_bytes(&curve->A, &curve->C, enc); +} + +static byte_t * +ec_point_to_bytes(byte_t *enc, const ec_point_t *point) +{ + return proj_to_bytes(enc, &point->x, &point->z); +} + +static const byte_t * +ec_point_from_bytes(ec_point_t *point, const byte_t *enc) +{ + return proj_from_bytes(&point->x, &point->z, enc); +} + +static byte_t * +ec_basis_to_bytes(byte_t *enc, const ec_basis_t *basis) +{ + enc = ec_point_to_bytes(enc, &basis->P); + enc = ec_point_to_bytes(enc, &basis->Q); + enc = ec_point_to_bytes(enc, &basis->PmQ); + return enc; +} + +static const byte_t * +ec_basis_from_bytes(ec_basis_t *basis, const byte_t *enc) +{ + enc = ec_point_from_bytes(&basis->P, enc); + enc = ec_point_from_bytes(&basis->Q, enc); + enc = ec_point_from_bytes(&basis->PmQ, enc); + return enc; +} + +// public API + +byte_t * +public_key_to_bytes(byte_t *enc, const public_key_t *pk) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_to_bytes(enc, &pk->curve); + *enc++ = pk->hint_pk; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +const byte_t * +public_key_from_bytes(public_key_t *pk, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_from_bytes(&pk->curve, enc); + pk->hint_pk = *enc++; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +void +signature_to_bytes(byte_t *enc, const signature_t *sig) +{ +#ifndef NDEBUG + byte_t *const start = enc; +#endif + + enc = fp2_to_bytes(enc, &sig->E_aux_A); + + *enc++ = sig->backtracking; + *enc++ = sig->two_resp_length; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][1], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][1], nbytes); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + encode_digits(enc, sig->chall_coeff, nbytes); + enc += nbytes; + + *enc++ = sig->hint_aux; + *enc++ = sig->hint_chall; + + assert(enc - start == SIGNATURE_BYTES); +} + +void +signature_from_bytes(signature_t *sig, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + + enc = fp2_from_bytes(&sig->E_aux_A, enc); + + sig->backtracking = *enc++; + sig->two_resp_length = *enc++; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + decode_digits(sig->chall_coeff, enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + sig->hint_aux = *enc++; + sig->hint_chall = *enc++; + + assert(enc - start == SIGNATURE_BYTES); +} diff --git a/src/pqm4/sqisign_lvl3/ref/encoded_sizes.h b/src/pqm4/sqisign_lvl3/ref/encoded_sizes.h new file mode 100644 index 0000000..50a8781 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/encoded_sizes.h @@ -0,0 +1,11 @@ +#define SECURITY_BITS 192 +#define SQIsign_response_length 192 +#define HASH_ITERATIONS 256 +#define FP_ENCODED_BYTES 48 +#define FP2_ENCODED_BYTES 96 +#define EC_CURVE_ENCODED_BYTES 96 +#define EC_POINT_ENCODED_BYTES 96 +#define EC_BASIS_ENCODED_BYTES 288 +#define PUBLICKEY_BYTES 97 +#define SECRETKEY_BYTES 529 +#define SIGNATURE_BYTES 224 diff --git a/src/pqm4/sqisign_lvl3/ref/fp.c b/src/pqm4/sqisign_lvl3/ref/fp.c new file mode 100644 index 0000000..48e2937 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp.c @@ -0,0 +1,15 @@ +#include + +/* + * If ctl == 0x00000000, then *d is set to a0 + * If ctl == 0xFFFFFFFF, then *d is set to a1 + * ctl MUST be either 0x00000000 or 0xFFFFFFFF. + */ +void +fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl) +{ + digit_t cw = (int32_t)ctl; + for (unsigned int i = 0; i < NWORDS_FIELD; i++) { + (*d)[i] = (*a0)[i] ^ (cw & ((*a0)[i] ^ (*a1)[i])); + } +} diff --git a/src/pqm4/sqisign_lvl3/ref/fp.h b/src/pqm4/sqisign_lvl3/ref/fp.h new file mode 100644 index 0000000..1241d58 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp.h @@ -0,0 +1,48 @@ +#ifndef FP_H +#define FP_H + +//////////////////////////////////////////////// NOTE: this is placed here for now +#include +#include +#include +#include +#include +#include +#include +#include + +typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements + +extern const digit_t ONE[NWORDS_FIELD]; +extern const digit_t ZERO[NWORDS_FIELD]; +// extern const digit_t PM1O3[NWORDS_FIELD]; + +void fp_set_small(fp_t *x, const digit_t val); +void fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val); +void fp_set_zero(fp_t *x); +void fp_set_one(fp_t *x); +uint32_t fp_is_equal(const fp_t *a, const fp_t *b); +uint32_t fp_is_zero(const fp_t *a); +void fp_copy(fp_t *out, const fp_t *a); + +void fp_encode(void *dst, const fp_t *a); +void fp_decode_reduce(fp_t *d, const void *src, size_t len); +uint32_t fp_decode(fp_t *d, const void *src); + +void fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl); +void fp_cswap(fp_t *a, fp_t *b, uint32_t ctl); + +void fp_add(fp_t *out, const fp_t *a, const fp_t *b); +void fp_sub(fp_t *out, const fp_t *a, const fp_t *b); +void fp_neg(fp_t *out, const fp_t *a); +void fp_sqr(fp_t *out, const fp_t *a); +void fp_mul(fp_t *out, const fp_t *a, const fp_t *b); + +void fp_inv(fp_t *x); +uint32_t fp_is_square(const fp_t *a); +void fp_sqrt(fp_t *a); +void fp_half(fp_t *out, const fp_t *a); +void fp_exp3div4(fp_t *out, const fp_t *a); +void fp_div3(fp_t *out, const fp_t *a); + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/fp2.c b/src/pqm4/sqisign_lvl3/ref/fp2.c new file mode 100644 index 0000000..a258952 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp2.c @@ -0,0 +1,328 @@ +#include +#include +#include + +/* Arithmetic modulo X^2 + 1 */ + +void +fp2_set_small(fp2_t *x, const digit_t val) +{ + fp_set_small(&(x->re), val); + fp_set_zero(&(x->im)); +} + +void +fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n) +{ + fp_mul_small(&x->re, &y->re, n); + fp_mul_small(&x->im, &y->im, n); +} + +void +fp2_set_one(fp2_t *x) +{ + fp_set_one(&(x->re)); + fp_set_zero(&(x->im)); +} + +void +fp2_set_zero(fp2_t *x) +{ + fp_set_zero(&(x->re)); + fp_set_zero(&(x->im)); +} + +// Is a GF(p^2) element zero? +// Returns 0xFF...FF (true) if a=0, 0 (false) otherwise +uint32_t +fp2_is_zero(const fp2_t *a) +{ + return fp_is_zero(&(a->re)) & fp_is_zero(&(a->im)); +} + +// Compare two GF(p^2) elements in constant time +// Returns 0xFF...FF (true) if a=b, 0 (false) otherwise +uint32_t +fp2_is_equal(const fp2_t *a, const fp2_t *b) +{ + return fp_is_equal(&(a->re), &(b->re)) & fp_is_equal(&(a->im), &(b->im)); +} + +// Is a GF(p^2) element one? +// Returns 0xFF...FF (true) if a=1, 0 (false) otherwise +uint32_t +fp2_is_one(const fp2_t *a) +{ + return fp_is_equal(&(a->re), &ONE) & fp_is_zero(&(a->im)); +} + +void +fp2_copy(fp2_t *x, const fp2_t *y) +{ + fp_copy(&(x->re), &(y->re)); + fp_copy(&(x->im), &(y->im)); +} + +void +fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_add(&(x->re), &(y->re), &(z->re)); + fp_add(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_add_one(fp2_t *x, const fp2_t *y) +{ + fp_add(&x->re, &y->re, &ONE); + fp_copy(&x->im, &y->im); +} + +void +fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_sub(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_neg(fp2_t *x, const fp2_t *y) +{ + fp_neg(&(x->re), &(y->re)); + fp_neg(&(x->im), &(y->im)); +} + +void +fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_t t0, t1; + + fp_add(&t0, &(y->re), &(y->im)); + fp_add(&t1, &(z->re), &(z->im)); + fp_mul(&t0, &t0, &t1); + fp_mul(&t1, &(y->im), &(z->im)); + fp_mul(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &t0, &t1); + fp_sub(&(x->im), &(x->im), &(x->re)); + fp_sub(&(x->re), &(x->re), &t1); +} + +void +fp2_sqr(fp2_t *x, const fp2_t *y) +{ + fp_t sum, diff; + + fp_add(&sum, &(y->re), &(y->im)); + fp_sub(&diff, &(y->re), &(y->im)); + fp_mul(&(x->im), &(y->re), &(y->im)); + fp_add(&(x->im), &(x->im), &(x->im)); + fp_mul(&(x->re), &sum, &diff); +} + +void +fp2_inv(fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + fp_inv(&t0); + fp_mul(&(x->re), &(x->re), &t0); + fp_mul(&(x->im), &(x->im), &t0); + fp_neg(&(x->im), &(x->im)); +} + +uint32_t +fp2_is_square(const fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + + return fp_is_square(&t0); +} + +void +fp2_sqrt(fp2_t *a) +{ + fp_t x0, x1, t0, t1; + + /* From "Optimized One-Dimensional SQIsign Verification on Intel and + * Cortex-M4" by Aardal et al: https://eprint.iacr.org/2024/1563 */ + + // x0 = \delta = sqrt(a0^2 + a1^2). + fp_sqr(&x0, &(a->re)); + fp_sqr(&x1, &(a->im)); + fp_add(&x0, &x0, &x1); + fp_sqrt(&x0); + // If a1 = 0, there is a risk of \delta = -a0, which makes x0 = 0 below. + // In that case, we restore the value \delta = a0. + fp_select(&x0, &x0, &(a->re), fp_is_zero(&(a->im))); + // x0 = \delta + a0, t0 = 2 * x0. + fp_add(&x0, &x0, &(a->re)); + fp_add(&t0, &x0, &x0); + + // x1 = t0^(p-3)/4 + fp_exp3div4(&x1, &t0); + + // x0 = x0 * x1, x1 = x1 * a1, t1 = (2x0)^2. + fp_mul(&x0, &x0, &x1); + fp_mul(&x1, &x1, &(a->im)); + fp_add(&t1, &x0, &x0); + fp_sqr(&t1, &t1); + // If t1 = t0, return x0 + x1*i, otherwise x1 - x0*i. + fp_sub(&t0, &t0, &t1); + uint32_t f = fp_is_zero(&t0); + fp_neg(&t1, &x0); + fp_copy(&t0, &x1); + fp_select(&t0, &t0, &x0, f); + fp_select(&t1, &t1, &x1, f); + + // Check if t0 is zero + uint32_t t0_is_zero = fp_is_zero(&t0); + + // Check whether t0, t1 are odd + // Note: we encode to ensure canonical representation + uint8_t tmp_bytes[FP_ENCODED_BYTES]; + fp_encode(tmp_bytes, &t0); + uint32_t t0_is_odd = -((uint32_t)tmp_bytes[0] & 1); + fp_encode(tmp_bytes, &t1); + uint32_t t1_is_odd = -((uint32_t)tmp_bytes[0] & 1); + + // We negate the output if: + // t0 is odd, or + // t0 is zero and t1 is odd + uint32_t negate_output = t0_is_odd | (t0_is_zero & t1_is_odd); + fp_neg(&x0, &t0); + fp_select(&(a->re), &t0, &x0, negate_output); + fp_neg(&x0, &t1); + fp_select(&(a->im), &t1, &x0, negate_output); +} + +uint32_t +fp2_sqrt_verify(fp2_t *a) +{ + fp2_t t0, t1; + + fp2_copy(&t0, a); + fp2_sqrt(a); + fp2_sqr(&t1, a); + + return (fp2_is_equal(&t0, &t1)); +} + +void +fp2_half(fp2_t *x, const fp2_t *y) +{ + fp_half(&(x->re), &(y->re)); + fp_half(&(x->im), &(y->im)); +} + +void +fp2_batched_inv(fp2_t *x, int len) +{ + fp2_t t1[len], t2[len]; + fp2_t inverse; + + // x = x0,...,xn + // t1 = x0, x0*x1, ... ,x0 * x1 * ... * xn + fp2_copy(&t1[0], &x[0]); + for (int i = 1; i < len; i++) { + fp2_mul(&t1[i], &t1[i - 1], &x[i]); + } + + // inverse = 1/ (x0 * x1 * ... * xn) + fp2_copy(&inverse, &t1[len - 1]); + fp2_inv(&inverse); + + fp2_copy(&t2[0], &inverse); + // t2 = 1/ (x0 * x1 * ... * xn), 1/ (x0 * x1 * ... * x(n-1)) , ... , 1/xO + for (int i = 1; i < len; i++) { + fp2_mul(&t2[i], &t2[i - 1], &x[len - i]); + } + + fp2_copy(&x[0], &t2[len - 1]); + + for (int i = 1; i < len; i++) { + fp2_mul(&x[i], &t1[i - 1], &t2[len - i - 1]); + } +} + +// exponentiation using square and multiply +// Warning!! Not constant time! +void +fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size) +{ + fp2_t acc; + digit_t bit; + + fp2_copy(&acc, x); + fp2_set_one(out); + + // Iterate over each word of exp + for (int j = 0; j < size; j++) { + // Iterate over each bit of the word + for (int i = 0; i < RADIX; i++) { + bit = (exp[j] >> i) & 1; + if (bit == 1) { + fp2_mul(out, out, &acc); + } + fp2_sqr(&acc, &acc); + } + } +} + +void +fp2_print(const char *name, const fp2_t *a) +{ + printf("%s0x", name); + + uint8_t buf[FP_ENCODED_BYTES]; + fp_encode(&buf, &a->re); // Encoding ensures canonical rep + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + + printf(" + i*0x"); + + fp_encode(&buf, &a->im); + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + printf("\n"); +} + +void +fp2_encode(void *dst, const fp2_t *a) +{ + uint8_t *buf = dst; + fp_encode(buf, &(a->re)); + fp_encode(buf + FP_ENCODED_BYTES, &(a->im)); +} + +uint32_t +fp2_decode(fp2_t *d, const void *src) +{ + const uint8_t *buf = src; + uint32_t re, im; + + re = fp_decode(&(d->re), buf); + im = fp_decode(&(d->im), buf + FP_ENCODED_BYTES); + return re & im; +} + +void +fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl) +{ + fp_select(&(d->re), &(a0->re), &(a1->re), ctl); + fp_select(&(d->im), &(a0->im), &(a1->im), ctl); +} + +void +fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl) +{ + fp_cswap(&(a->re), &(b->re), ctl); + fp_cswap(&(a->im), &(b->im), ctl); +} diff --git a/src/pqm4/sqisign_lvl3/ref/fp2.h b/src/pqm4/sqisign_lvl3/ref/fp2.h new file mode 100644 index 0000000..00e673b --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp2.h @@ -0,0 +1,41 @@ +#ifndef FP2_H +#define FP2_H + +#include +#include "fp.h" +#include + +// Structure for representing elements in GF(p^2) +typedef struct fp2_t +{ + fp_t re, im; +} fp2_t; + +void fp2_set_small(fp2_t *x, const digit_t val); +void fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n); +void fp2_set_one(fp2_t *x); +void fp2_set_zero(fp2_t *x); +uint32_t fp2_is_zero(const fp2_t *a); +uint32_t fp2_is_equal(const fp2_t *a, const fp2_t *b); +uint32_t fp2_is_one(const fp2_t *a); +void fp2_copy(fp2_t *x, const fp2_t *y); +void fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_add_one(fp2_t *x, const fp2_t *y); +void fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_neg(fp2_t *x, const fp2_t *y); +void fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_sqr(fp2_t *x, const fp2_t *y); +void fp2_inv(fp2_t *x); +uint32_t fp2_is_square(const fp2_t *x); +void fp2_sqrt(fp2_t *x); +uint32_t fp2_sqrt_verify(fp2_t *a); +void fp2_half(fp2_t *x, const fp2_t *y); +void fp2_batched_inv(fp2_t *x, int len); +void fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size); +void fp2_print(const char *name, const fp2_t *a); +void fp2_encode(void *dst, const fp2_t *a); +uint32_t fp2_decode(fp2_t *d, const void *src); +void fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl); +void fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl); + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/fp_constants.h b/src/pqm4/sqisign_lvl3/ref/fp_constants.h new file mode 100644 index 0000000..063579a --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp_constants.h @@ -0,0 +1,17 @@ +#if RADIX == 32 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 12 +#else +#define NWORDS_FIELD 14 +#endif +#define NWORDS_ORDER 12 +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 6 +#else +#define NWORDS_FIELD 7 +#endif +#define NWORDS_ORDER 6 +#endif +#define BITS 384 +#define LOG2P 9 diff --git a/src/pqm4/sqisign_lvl3/ref/fp_p65376_32.c b/src/pqm4/sqisign_lvl3/ref/fp_p65376_32.c new file mode 100644 index 0000000..1483461 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/fp_p65376_32.c @@ -0,0 +1,1231 @@ +// clang-format off +// Command line : python monty.py 32 +// 0x40ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + +#include +#include + +#define sspint int32_t +#define spint uint32_t +#define udpint uint64_t +#define dpint uint64_t + +#define Wordlength 32 +#define Nlimbs 14 +#define Radix 28 +#define Nbits 383 +#define Nbytes 48 + +#define MONTGOMERY +// propagate carries +inline static spint prop(spint *n) { + int i; + spint mask = ((spint)1 << 28u) - (spint)1; + sspint carry = (sspint)n[0]; + carry >>= 28u; + n[0] &= mask; + for (i = 1; i < 13; i++) { + carry += (sspint)n[i]; + n[i] = (spint)carry & mask; + carry >>= 28u; + } + n[13] += (spint)carry; + return -((n[13] >> 1) >> 30u); +} + +// propagate carries and add p if negative, propagate carries again +inline static int flatten(spint *n) { + spint carry = prop(n); + n[0] -= (spint)1u & carry; + n[13] += ((spint)0x41000u) & carry; + (void)prop(n); + return (int)(carry & 1); +} + +// Montgomery final subtract +static int modfsb(spint *n) { + n[0] += (spint)1u; + n[13] -= (spint)0x41000u; + return flatten(n); +} + +// Modular addition - reduce less than 2p +static void modadd(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] + b[0]; + n[1] = a[1] + b[1]; + n[2] = a[2] + b[2]; + n[3] = a[3] + b[3]; + n[4] = a[4] + b[4]; + n[5] = a[5] + b[5]; + n[6] = a[6] + b[6]; + n[7] = a[7] + b[7]; + n[8] = a[8] + b[8]; + n[9] = a[9] + b[9]; + n[10] = a[10] + b[10]; + n[11] = a[11] + b[11]; + n[12] = a[12] + b[12]; + n[13] = a[13] + b[13]; + n[0] += (spint)2u; + n[13] -= (spint)0x82000u; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[13] += ((spint)0x82000u) & carry; + (void)prop(n); +} + +// Modular subtraction - reduce less than 2p +static void modsub(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] - b[0]; + n[1] = a[1] - b[1]; + n[2] = a[2] - b[2]; + n[3] = a[3] - b[3]; + n[4] = a[4] - b[4]; + n[5] = a[5] - b[5]; + n[6] = a[6] - b[6]; + n[7] = a[7] - b[7]; + n[8] = a[8] - b[8]; + n[9] = a[9] - b[9]; + n[10] = a[10] - b[10]; + n[11] = a[11] - b[11]; + n[12] = a[12] - b[12]; + n[13] = a[13] - b[13]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[13] += ((spint)0x82000u) & carry; + (void)prop(n); +} + +// Modular negation +static void modneg(const spint *b, spint *n) { + spint carry; + n[0] = (spint)0 - b[0]; + n[1] = (spint)0 - b[1]; + n[2] = (spint)0 - b[2]; + n[3] = (spint)0 - b[3]; + n[4] = (spint)0 - b[4]; + n[5] = (spint)0 - b[5]; + n[6] = (spint)0 - b[6]; + n[7] = (spint)0 - b[7]; + n[8] = (spint)0 - b[8]; + n[9] = (spint)0 - b[9]; + n[10] = (spint)0 - b[10]; + n[11] = (spint)0 - b[11]; + n[12] = (spint)0 - b[12]; + n[13] = (spint)0 - b[13]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[13] += ((spint)0x82000u) & carry; + (void)prop(n); +} + +// Overflow limit = 18446744073709551616 +// maximum possible = 1008877845989814286 +// Modular multiplication, c=a*b mod 2p +static void modmul(const spint *a, const spint *b, spint *c) { + dpint t = 0; + spint p13 = 0x41000u; + spint q = ((spint)1 << 28u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + t += (dpint)a[0] * b[0]; + spint v0 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[1]; + t += (dpint)a[1] * b[0]; + spint v1 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[2]; + t += (dpint)a[1] * b[1]; + t += (dpint)a[2] * b[0]; + spint v2 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[3]; + t += (dpint)a[1] * b[2]; + t += (dpint)a[2] * b[1]; + t += (dpint)a[3] * b[0]; + spint v3 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[4]; + t += (dpint)a[1] * b[3]; + t += (dpint)a[2] * b[2]; + t += (dpint)a[3] * b[1]; + t += (dpint)a[4] * b[0]; + spint v4 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[5]; + t += (dpint)a[1] * b[4]; + t += (dpint)a[2] * b[3]; + t += (dpint)a[3] * b[2]; + t += (dpint)a[4] * b[1]; + t += (dpint)a[5] * b[0]; + spint v5 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[6]; + t += (dpint)a[1] * b[5]; + t += (dpint)a[2] * b[4]; + t += (dpint)a[3] * b[3]; + t += (dpint)a[4] * b[2]; + t += (dpint)a[5] * b[1]; + t += (dpint)a[6] * b[0]; + spint v6 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[7]; + t += (dpint)a[1] * b[6]; + t += (dpint)a[2] * b[5]; + t += (dpint)a[3] * b[4]; + t += (dpint)a[4] * b[3]; + t += (dpint)a[5] * b[2]; + t += (dpint)a[6] * b[1]; + t += (dpint)a[7] * b[0]; + spint v7 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[8]; + t += (dpint)a[1] * b[7]; + t += (dpint)a[2] * b[6]; + t += (dpint)a[3] * b[5]; + t += (dpint)a[4] * b[4]; + t += (dpint)a[5] * b[3]; + t += (dpint)a[6] * b[2]; + t += (dpint)a[7] * b[1]; + t += (dpint)a[8] * b[0]; + spint v8 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[9]; + t += (dpint)a[1] * b[8]; + t += (dpint)a[2] * b[7]; + t += (dpint)a[3] * b[6]; + t += (dpint)a[4] * b[5]; + t += (dpint)a[5] * b[4]; + t += (dpint)a[6] * b[3]; + t += (dpint)a[7] * b[2]; + t += (dpint)a[8] * b[1]; + t += (dpint)a[9] * b[0]; + spint v9 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[10]; + t += (dpint)a[1] * b[9]; + t += (dpint)a[2] * b[8]; + t += (dpint)a[3] * b[7]; + t += (dpint)a[4] * b[6]; + t += (dpint)a[5] * b[5]; + t += (dpint)a[6] * b[4]; + t += (dpint)a[7] * b[3]; + t += (dpint)a[8] * b[2]; + t += (dpint)a[9] * b[1]; + t += (dpint)a[10] * b[0]; + spint v10 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[11]; + t += (dpint)a[1] * b[10]; + t += (dpint)a[2] * b[9]; + t += (dpint)a[3] * b[8]; + t += (dpint)a[4] * b[7]; + t += (dpint)a[5] * b[6]; + t += (dpint)a[6] * b[5]; + t += (dpint)a[7] * b[4]; + t += (dpint)a[8] * b[3]; + t += (dpint)a[9] * b[2]; + t += (dpint)a[10] * b[1]; + t += (dpint)a[11] * b[0]; + spint v11 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[12]; + t += (dpint)a[1] * b[11]; + t += (dpint)a[2] * b[10]; + t += (dpint)a[3] * b[9]; + t += (dpint)a[4] * b[8]; + t += (dpint)a[5] * b[7]; + t += (dpint)a[6] * b[6]; + t += (dpint)a[7] * b[5]; + t += (dpint)a[8] * b[4]; + t += (dpint)a[9] * b[3]; + t += (dpint)a[10] * b[2]; + t += (dpint)a[11] * b[1]; + t += (dpint)a[12] * b[0]; + spint v12 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[0] * b[13]; + t += (dpint)a[1] * b[12]; + t += (dpint)a[2] * b[11]; + t += (dpint)a[3] * b[10]; + t += (dpint)a[4] * b[9]; + t += (dpint)a[5] * b[8]; + t += (dpint)a[6] * b[7]; + t += (dpint)a[7] * b[6]; + t += (dpint)a[8] * b[5]; + t += (dpint)a[9] * b[4]; + t += (dpint)a[10] * b[3]; + t += (dpint)a[11] * b[2]; + t += (dpint)a[12] * b[1]; + t += (dpint)a[13] * b[0]; + t += (dpint)v0 * (dpint)p13; + spint v13 = ((spint)t & mask); + t >>= 28; + t += (dpint)a[1] * b[13]; + t += (dpint)a[2] * b[12]; + t += (dpint)a[3] * b[11]; + t += (dpint)a[4] * b[10]; + t += (dpint)a[5] * b[9]; + t += (dpint)a[6] * b[8]; + t += (dpint)a[7] * b[7]; + t += (dpint)a[8] * b[6]; + t += (dpint)a[9] * b[5]; + t += (dpint)a[10] * b[4]; + t += (dpint)a[11] * b[3]; + t += (dpint)a[12] * b[2]; + t += (dpint)a[13] * b[1]; + t += (dpint)v1 * (dpint)p13; + c[0] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[2] * b[13]; + t += (dpint)a[3] * b[12]; + t += (dpint)a[4] * b[11]; + t += (dpint)a[5] * b[10]; + t += (dpint)a[6] * b[9]; + t += (dpint)a[7] * b[8]; + t += (dpint)a[8] * b[7]; + t += (dpint)a[9] * b[6]; + t += (dpint)a[10] * b[5]; + t += (dpint)a[11] * b[4]; + t += (dpint)a[12] * b[3]; + t += (dpint)a[13] * b[2]; + t += (dpint)v2 * (dpint)p13; + c[1] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[3] * b[13]; + t += (dpint)a[4] * b[12]; + t += (dpint)a[5] * b[11]; + t += (dpint)a[6] * b[10]; + t += (dpint)a[7] * b[9]; + t += (dpint)a[8] * b[8]; + t += (dpint)a[9] * b[7]; + t += (dpint)a[10] * b[6]; + t += (dpint)a[11] * b[5]; + t += (dpint)a[12] * b[4]; + t += (dpint)a[13] * b[3]; + t += (dpint)v3 * (dpint)p13; + c[2] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[4] * b[13]; + t += (dpint)a[5] * b[12]; + t += (dpint)a[6] * b[11]; + t += (dpint)a[7] * b[10]; + t += (dpint)a[8] * b[9]; + t += (dpint)a[9] * b[8]; + t += (dpint)a[10] * b[7]; + t += (dpint)a[11] * b[6]; + t += (dpint)a[12] * b[5]; + t += (dpint)a[13] * b[4]; + t += (dpint)v4 * (dpint)p13; + c[3] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[5] * b[13]; + t += (dpint)a[6] * b[12]; + t += (dpint)a[7] * b[11]; + t += (dpint)a[8] * b[10]; + t += (dpint)a[9] * b[9]; + t += (dpint)a[10] * b[8]; + t += (dpint)a[11] * b[7]; + t += (dpint)a[12] * b[6]; + t += (dpint)a[13] * b[5]; + t += (dpint)v5 * (dpint)p13; + c[4] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[6] * b[13]; + t += (dpint)a[7] * b[12]; + t += (dpint)a[8] * b[11]; + t += (dpint)a[9] * b[10]; + t += (dpint)a[10] * b[9]; + t += (dpint)a[11] * b[8]; + t += (dpint)a[12] * b[7]; + t += (dpint)a[13] * b[6]; + t += (dpint)v6 * (dpint)p13; + c[5] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[7] * b[13]; + t += (dpint)a[8] * b[12]; + t += (dpint)a[9] * b[11]; + t += (dpint)a[10] * b[10]; + t += (dpint)a[11] * b[9]; + t += (dpint)a[12] * b[8]; + t += (dpint)a[13] * b[7]; + t += (dpint)v7 * (dpint)p13; + c[6] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[8] * b[13]; + t += (dpint)a[9] * b[12]; + t += (dpint)a[10] * b[11]; + t += (dpint)a[11] * b[10]; + t += (dpint)a[12] * b[9]; + t += (dpint)a[13] * b[8]; + t += (dpint)v8 * (dpint)p13; + c[7] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[9] * b[13]; + t += (dpint)a[10] * b[12]; + t += (dpint)a[11] * b[11]; + t += (dpint)a[12] * b[10]; + t += (dpint)a[13] * b[9]; + t += (dpint)v9 * (dpint)p13; + c[8] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[10] * b[13]; + t += (dpint)a[11] * b[12]; + t += (dpint)a[12] * b[11]; + t += (dpint)a[13] * b[10]; + t += (dpint)v10 * (dpint)p13; + c[9] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[11] * b[13]; + t += (dpint)a[12] * b[12]; + t += (dpint)a[13] * b[11]; + t += (dpint)v11 * (dpint)p13; + c[10] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[12] * b[13]; + t += (dpint)a[13] * b[12]; + t += (dpint)v12 * (dpint)p13; + c[11] = ((spint)t & mask); + t >>= 28; + t += (dpint)a[13] * b[13]; + t += (dpint)v13 * (dpint)p13; + c[12] = ((spint)t & mask); + t >>= 28; + c[13] = (spint)t; +} + +// Modular squaring, c=a*a mod 2p +static void modsqr(const spint *a, spint *c) { + udpint tot; + udpint t = 0; + spint p13 = 0x41000u; + spint q = ((spint)1 << 28u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + tot = (udpint)a[0] * a[0]; + t = tot; + spint v0 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[1]; + tot *= 2; + t += tot; + spint v1 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[2]; + tot *= 2; + tot += (udpint)a[1] * a[1]; + t += tot; + spint v2 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[3]; + tot += (udpint)a[1] * a[2]; + tot *= 2; + t += tot; + spint v3 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[4]; + tot += (udpint)a[1] * a[3]; + tot *= 2; + tot += (udpint)a[2] * a[2]; + t += tot; + spint v4 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[5]; + tot += (udpint)a[1] * a[4]; + tot += (udpint)a[2] * a[3]; + tot *= 2; + t += tot; + spint v5 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[6]; + tot += (udpint)a[1] * a[5]; + tot += (udpint)a[2] * a[4]; + tot *= 2; + tot += (udpint)a[3] * a[3]; + t += tot; + spint v6 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[7]; + tot += (udpint)a[1] * a[6]; + tot += (udpint)a[2] * a[5]; + tot += (udpint)a[3] * a[4]; + tot *= 2; + t += tot; + spint v7 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[8]; + tot += (udpint)a[1] * a[7]; + tot += (udpint)a[2] * a[6]; + tot += (udpint)a[3] * a[5]; + tot *= 2; + tot += (udpint)a[4] * a[4]; + t += tot; + spint v8 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[9]; + tot += (udpint)a[1] * a[8]; + tot += (udpint)a[2] * a[7]; + tot += (udpint)a[3] * a[6]; + tot += (udpint)a[4] * a[5]; + tot *= 2; + t += tot; + spint v9 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[10]; + tot += (udpint)a[1] * a[9]; + tot += (udpint)a[2] * a[8]; + tot += (udpint)a[3] * a[7]; + tot += (udpint)a[4] * a[6]; + tot *= 2; + tot += (udpint)a[5] * a[5]; + t += tot; + spint v10 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[11]; + tot += (udpint)a[1] * a[10]; + tot += (udpint)a[2] * a[9]; + tot += (udpint)a[3] * a[8]; + tot += (udpint)a[4] * a[7]; + tot += (udpint)a[5] * a[6]; + tot *= 2; + t += tot; + spint v11 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[12]; + tot += (udpint)a[1] * a[11]; + tot += (udpint)a[2] * a[10]; + tot += (udpint)a[3] * a[9]; + tot += (udpint)a[4] * a[8]; + tot += (udpint)a[5] * a[7]; + tot *= 2; + tot += (udpint)a[6] * a[6]; + t += tot; + spint v12 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[0] * a[13]; + tot += (udpint)a[1] * a[12]; + tot += (udpint)a[2] * a[11]; + tot += (udpint)a[3] * a[10]; + tot += (udpint)a[4] * a[9]; + tot += (udpint)a[5] * a[8]; + tot += (udpint)a[6] * a[7]; + tot *= 2; + t += tot; + t += (udpint)v0 * p13; + spint v13 = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[1] * a[13]; + tot += (udpint)a[2] * a[12]; + tot += (udpint)a[3] * a[11]; + tot += (udpint)a[4] * a[10]; + tot += (udpint)a[5] * a[9]; + tot += (udpint)a[6] * a[8]; + tot *= 2; + tot += (udpint)a[7] * a[7]; + t += tot; + t += (udpint)v1 * p13; + c[0] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[2] * a[13]; + tot += (udpint)a[3] * a[12]; + tot += (udpint)a[4] * a[11]; + tot += (udpint)a[5] * a[10]; + tot += (udpint)a[6] * a[9]; + tot += (udpint)a[7] * a[8]; + tot *= 2; + t += tot; + t += (udpint)v2 * p13; + c[1] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[3] * a[13]; + tot += (udpint)a[4] * a[12]; + tot += (udpint)a[5] * a[11]; + tot += (udpint)a[6] * a[10]; + tot += (udpint)a[7] * a[9]; + tot *= 2; + tot += (udpint)a[8] * a[8]; + t += tot; + t += (udpint)v3 * p13; + c[2] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[4] * a[13]; + tot += (udpint)a[5] * a[12]; + tot += (udpint)a[6] * a[11]; + tot += (udpint)a[7] * a[10]; + tot += (udpint)a[8] * a[9]; + tot *= 2; + t += tot; + t += (udpint)v4 * p13; + c[3] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[5] * a[13]; + tot += (udpint)a[6] * a[12]; + tot += (udpint)a[7] * a[11]; + tot += (udpint)a[8] * a[10]; + tot *= 2; + tot += (udpint)a[9] * a[9]; + t += tot; + t += (udpint)v5 * p13; + c[4] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[6] * a[13]; + tot += (udpint)a[7] * a[12]; + tot += (udpint)a[8] * a[11]; + tot += (udpint)a[9] * a[10]; + tot *= 2; + t += tot; + t += (udpint)v6 * p13; + c[5] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[7] * a[13]; + tot += (udpint)a[8] * a[12]; + tot += (udpint)a[9] * a[11]; + tot *= 2; + tot += (udpint)a[10] * a[10]; + t += tot; + t += (udpint)v7 * p13; + c[6] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[8] * a[13]; + tot += (udpint)a[9] * a[12]; + tot += (udpint)a[10] * a[11]; + tot *= 2; + t += tot; + t += (udpint)v8 * p13; + c[7] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[9] * a[13]; + tot += (udpint)a[10] * a[12]; + tot *= 2; + tot += (udpint)a[11] * a[11]; + t += tot; + t += (udpint)v9 * p13; + c[8] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[10] * a[13]; + tot += (udpint)a[11] * a[12]; + tot *= 2; + t += tot; + t += (udpint)v10 * p13; + c[9] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[11] * a[13]; + tot *= 2; + tot += (udpint)a[12] * a[12]; + t += tot; + t += (udpint)v11 * p13; + c[10] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[12] * a[13]; + tot *= 2; + t += tot; + t += (udpint)v12 * p13; + c[11] = ((spint)t & mask); + t >>= 28; + tot = (udpint)a[13] * a[13]; + t += tot; + t += (udpint)v13 * p13; + c[12] = ((spint)t & mask); + t >>= 28; + c[13] = (spint)t; +} + +// copy +static void modcpy(const spint *a, spint *c) { + int i; + for (i = 0; i < 14; i++) { + c[i] = a[i]; + } +} + +// square n times +static void modnsqr(spint *a, int n) { + int i; + for (i = 0; i < n; i++) { + modsqr(a, a); + } +} + +// Calculate progenitor +static void modpro(const spint *w, spint *z) { + spint x[14]; + spint t0[14]; + spint t1[14]; + spint t2[14]; + spint t3[14]; + spint t4[14]; + spint t5[14]; + modcpy(w, x); + modsqr(x, z); + modsqr(z, t0); + modmul(x, t0, t1); + modmul(z, t1, z); + modsqr(z, t0); + modsqr(t0, t3); + modsqr(t3, t4); + modsqr(t4, t2); + modcpy(t2, t5); + modnsqr(t5, 3); + modmul(t2, t5, t2); + modcpy(t2, t5); + modnsqr(t5, 6); + modmul(t2, t5, t2); + modcpy(t2, t5); + modnsqr(t5, 2); + modmul(t4, t5, t5); + modnsqr(t5, 13); + modmul(t2, t5, t2); + modcpy(t2, t5); + modnsqr(t5, 2); + modmul(t4, t5, t4); + modnsqr(t4, 28); + modmul(t2, t4, t2); + modsqr(t2, t4); + modmul(t3, t4, t3); + modnsqr(t3, 59); + modmul(t2, t3, t2); + modmul(t1, t2, t1); + modmul(z, t1, z); + modmul(t0, z, t0); + modmul(t1, t0, t1); + modsqr(t1, t2); + modmul(t1, t2, t2); + modsqr(t2, t2); + modmul(t1, t2, t2); + modmul(t0, t2, t0); + modmul(z, t0, z); + modsqr(z, t2); + modmul(z, t2, t2); + modmul(t0, t2, t0); + modmul(t1, t0, t1); + modcpy(t1, t2); + modnsqr(t2, 128); + modmul(t1, t2, t1); + modmul(t0, t1, t0); + modnsqr(t0, 125); + modmul(z, t0, z); +} + +// calculate inverse, provide progenitor h if available +static void modinv(const spint *x, const spint *h, spint *z) { + spint s[14]; + spint t[14]; + if (h == NULL) { + modpro(x, t); + } else { + modcpy(h, t); + } + modcpy(x, s); + modnsqr(t, 2); + modmul(s, t, z); +} + +// Convert m to n-residue form, n=nres(m) +static void nres(const spint *m, spint *n) { + const spint c[14] = {0xf13732fu, 0x3f03f03u, 0x3f03f0u, 0xf03f03fu, + 0x3f03f03u, 0x3f03f0u, 0xf03f03fu, 0x3f03f03u, + 0x3f03f0u, 0xf03f03fu, 0x3f03f03u, 0x3f03f0u, + 0xf03f03fu, 0x14f03u}; + modmul(m, c, n); +} + +// Convert n back to normal form, m=redc(n) +static void redc(const spint *n, spint *m) { + int i; + spint c[14]; + c[0] = 1; + for (i = 1; i < 14; i++) { + c[i] = 0; + } + modmul(n, c, m); + (void)modfsb(m); +} + +// is unity? +static int modis1(const spint *a) { + int i; + spint c[14]; + spint c0; + spint d = 0; + redc(a, c); + for (i = 1; i < 14; i++) { + d |= c[i]; + } + c0 = (spint)c[0]; + return ((spint)1 & ((d - (spint)1) >> 28u) & + (((c0 ^ (spint)1) - (spint)1) >> 28u)); +} + +// is zero? +static int modis0(const spint *a) { + int i; + spint c[14]; + spint d = 0; + redc(a, c); + for (i = 0; i < 14; i++) { + d |= c[i]; + } + return ((spint)1 & ((d - (spint)1) >> 28u)); +} + +// set to zero +static void modzer(spint *a) { + int i; + for (i = 0; i < 14; i++) { + a[i] = 0; + } +} + +// set to one +static void modone(spint *a) { + int i; + a[0] = 1; + for (i = 1; i < 14; i++) { + a[i] = 0; + } + nres(a, a); +} + +// set to integer +static void modint(int x, spint *a) { + int i; + a[0] = (spint)x; + for (i = 1; i < 14; i++) { + a[i] = 0; + } + nres(a, a); +} + +// Modular multiplication by an integer, c=a*b mod 2p +static void modmli(const spint *a, int b, spint *c) { + spint t[14]; + modint(b, t); + modmul(a, t, c); +} + +// Test for quadratic residue +static int modqr(const spint *h, const spint *x) { + spint r[14]; + if (h == NULL) { + modpro(x, r); + modsqr(r, r); + } else { + modsqr(h, r); + } + modmul(r, x, r); + return modis1(r) | modis0(x); +} + +// conditional move g to f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcmv(int b, const spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 14; i++) { + s = g[i]; + t = f[i]; + f[i] = c0 * t + c1 * s; + f[i] -= r * (t + s); + } +} + +// conditional swap g and f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcsw(int b, volatile spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t, w; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 14; i++) { + s = g[i]; + t = f[i]; + w = r * (t + s); + f[i] = c0 * t + c1 * s; + f[i] -= w; + g[i] = c0 * s + c1 * t; + g[i] -= w; + } +} + +// Modular square root, provide progenitor h if available, NULL if not +static void modsqrt(const spint *x, const spint *h, spint *r) { + spint s[14]; + spint y[14]; + if (h == NULL) { + modpro(x, y); + } else { + modcpy(h, y); + } + modmul(y, x, s); + modcpy(s, r); +} + +// shift left by less than a word +static void modshl(unsigned int n, spint *a) { + int i; + a[13] = ((a[13] << n)) | (a[12] >> (28u - n)); + for (i = 12; i > 0; i--) { + a[i] = ((a[i] << n) & (spint)0xfffffff) | (a[i - 1] >> (28u - n)); + } + a[0] = (a[0] << n) & (spint)0xfffffff; +} + +// shift right by less than a word. Return shifted out part +static int modshr(unsigned int n, spint *a) { + int i; + spint r = a[0] & (((spint)1 << n) - (spint)1); + for (i = 0; i < 13; i++) { + a[i] = (a[i] >> n) | ((a[i + 1] << (28u - n)) & (spint)0xfffffff); + } + a[13] = a[13] >> n; + return r; +} + +// set a= 2^r +static void mod2r(unsigned int r, spint *a) { + unsigned int n = r / 28u; + unsigned int m = r % 28u; + modzer(a); + if (r >= 48 * 8) + return; + a[n] = 1; + a[n] <<= m; + nres(a, a); +} + +// export to byte array +static void modexp(const spint *a, char *b) { + int i; + spint c[14]; + redc(a, c); + for (i = 47; i >= 0; i--) { + b[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +// import from byte array +// returns 1 if in range, else 0 +static int modimp(const char *b, spint *a) { + int i, res; + for (i = 0; i < 14; i++) { + a[i] = 0; + } + for (i = 0; i < 48; i++) { + modshl(8, a); + a[0] += (spint)(unsigned char)b[i]; + } + res = modfsb(a); + nres(a, a); + return res; +} + +// determine sign +static int modsign(const spint *a) { + spint c[14]; + redc(a, c); + return c[0] % 2; +} + +// return true if equal +static int modcmp(const spint *a, const spint *b) { + spint c[14], d[14]; + int i, eq = 1; + redc(a, c); + redc(b, d); + for (i = 0; i < 14; i++) { + eq &= (((c[i] ^ d[i]) - 1) >> 28) & 1; + } + return eq; +} + +// clang-format on +/****************************************************************************** + API functions calling generated code above + ******************************************************************************/ + +#include + +const digit_t ZERO[NWORDS_FIELD] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +const digit_t ONE[NWORDS_FIELD] = { + 0x000003f0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00010000 +}; +// Montgomery representation of 2^-1 +static const digit_t TWO_INV[NWORDS_FIELD] = { 0x000001f8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00008000 }; +// Montgomery representation of 3^-1 +static const digit_t THREE_INV[NWORDS_FIELD] = { 0x0aaaabfa, 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, + 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, + 0x0aaaaaaa, 0x0aaaaaaa, 0x0aaaaaaa, 0x00030aaa }; +// Montgomery representation of 2^384 +static const digit_t R2[NWORDS_FIELD] = { 0x003f1373, 0x0f03f03f, 0x03f03f03, 0x003f03f0, 0x0f03f03f, + 0x03f03f03, 0x003f03f0, 0x0f03f03f, 0x03f03f03, 0x003f03f0, + 0x0f03f03f, 0x03f03f03, 0x003f03f0, 0x0000c03f }; + +void +fp_set_small(fp_t *x, const digit_t val) +{ + modint((int)val, *x); +} + +void +fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val) +{ + modmli(*a, (int)val, *x); +} + +void +fp_set_zero(fp_t *x) +{ + modzer(*x); +} + +void +fp_set_one(fp_t *x) +{ + modone(*x); +} + +uint32_t +fp_is_equal(const fp_t *a, const fp_t *b) +{ + return -(uint32_t)modcmp(*a, *b); +} + +uint32_t +fp_is_zero(const fp_t *a) +{ + return -(uint32_t)modis0(*a); +} + +void +fp_copy(fp_t *out, const fp_t *a) +{ + modcpy(*a, *out); +} + +void +fp_cswap(fp_t *a, fp_t *b, uint32_t ctl) +{ + modcsw((int)(ctl & 0x1), *a, *b); +} + +void +fp_add(fp_t *out, const fp_t *a, const fp_t *b) +{ + modadd(*a, *b, *out); +} + +void +fp_sub(fp_t *out, const fp_t *a, const fp_t *b) +{ + modsub(*a, *b, *out); +} + +void +fp_neg(fp_t *out, const fp_t *a) +{ + modneg(*a, *out); +} + +void +fp_sqr(fp_t *out, const fp_t *a) +{ + modsqr(*a, *out); +} + +void +fp_mul(fp_t *out, const fp_t *a, const fp_t *b) +{ + modmul(*a, *b, *out); +} + +void +fp_inv(fp_t *x) +{ + modinv(*x, NULL, *x); +} + +uint32_t +fp_is_square(const fp_t *a) +{ + return -(uint32_t)modqr(NULL, *a); +} + +void +fp_sqrt(fp_t *a) +{ + modsqrt(*a, NULL, *a); +} + +void +fp_half(fp_t *out, const fp_t *a) +{ + modmul(TWO_INV, *a, *out); +} + +void +fp_exp3div4(fp_t *out, const fp_t *a) +{ + modpro(*a, *out); +} + +void +fp_div3(fp_t *out, const fp_t *a) +{ + modmul(THREE_INV, *a, *out); +} + +void +fp_encode(void *dst, const fp_t *a) +{ + // Modified version of modexp() + int i; + spint c[14]; + redc(*a, c); + for (i = 0; i < 48; i++) { + ((char *)dst)[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +uint32_t +fp_decode(fp_t *d, const void *src) +{ + // Modified version of modimp() + int i; + spint res; + const unsigned char *b = src; + for (i = 0; i < 14; i++) { + (*d)[i] = 0; + } + for (i = 47; i >= 0; i--) { + modshl(8, *d); + (*d)[0] += (spint)b[i]; + } + res = (spint)-modfsb(*d); + nres(*d, *d); + // If the value was canonical then res = -1; otherwise, res = 0 + for (i = 0; i < 14; i++) { + (*d)[i] &= res; + } + return (uint32_t)res; +} + +static inline unsigned char +add_carry(unsigned char cc, spint a, spint b, spint *d) +{ + udpint t = (udpint)a + (udpint)b + cc; + *d = (spint)t; + return (unsigned char)(t >> Wordlength); +} + +static void +partial_reduce(spint *out, const spint *src) +{ + spint h, l, quo, rem; + unsigned char cc; + + // Split value in high (8 bits) and low (376 bits) parts. + h = src[11] >> 24; + l = src[11] & 0x00FFFFFF; + + // 65*2^376 = 1 mod q; hence, we add floor(h/65) + (h mod 65)*2^376 + // to the low part. + quo = (h * 0xFC1) >> 18; + rem = h - (65 * quo); + cc = add_carry(0, src[0], quo, &out[0]); + cc = add_carry(cc, src[1], 0, &out[1]); + cc = add_carry(cc, src[2], 0, &out[2]); + cc = add_carry(cc, src[3], 0, &out[3]); + cc = add_carry(cc, src[4], 0, &out[4]); + cc = add_carry(cc, src[5], 0, &out[5]); + cc = add_carry(cc, src[6], 0, &out[6]); + cc = add_carry(cc, src[7], 0, &out[7]); + cc = add_carry(cc, src[8], 0, &out[8]); + cc = add_carry(cc, src[9], 0, &out[9]); + cc = add_carry(cc, src[10], 0, &out[10]); + (void)add_carry(cc, l, rem << 24, &out[11]); +} + +// Little-endian encoding of a 32-bit integer. +static inline void +enc32le(void *dst, uint32_t x) +{ + uint8_t *buf = dst; + buf[0] = (uint8_t)x; + buf[1] = (uint8_t)(x >> 8); + buf[2] = (uint8_t)(x >> 16); + buf[3] = (uint8_t)(x >> 24); +} + +// Little-endian decoding of a 32-bit integer. +static inline uint32_t +dec32le(const void *src) +{ + const uint8_t *buf = src; + return (spint)buf[0] | ((spint)buf[1] << 8) | ((spint)buf[2] << 16) | ((spint)buf[3] << 24); +} + +void +fp_decode_reduce(fp_t *d, const void *src, size_t len) +{ + uint32_t t[12]; // Stores Nbytes * 8 bits + uint8_t tmp[48]; // Nbytes + const uint8_t *b = src; + + fp_set_zero(d); + if (len == 0) { + return; + } + + size_t rem = len % 48; + if (rem != 0) { + // Input size is not a multiple of 48, we decode a partial + // block, which is already less than 2^376. + size_t k = len - rem; + memcpy(tmp, b + k, len - k); + memset(tmp + len - k, 0, (sizeof tmp) - (len - k)); + fp_decode(d, tmp); + len = k; + } + // Process all remaining blocks, in descending address order. + while (len > 0) { + fp_mul(d, d, &R2); + len -= 48; + t[0] = dec32le(b + len); + t[1] = dec32le(b + len + 4); + t[2] = dec32le(b + len + 8); + t[3] = dec32le(b + len + 12); + t[4] = dec32le(b + len + 16); + t[5] = dec32le(b + len + 20); + t[6] = dec32le(b + len + 24); + t[7] = dec32le(b + len + 28); + t[8] = dec32le(b + len + 32); + t[9] = dec32le(b + len + 36); + t[10] = dec32le(b + len + 40); + t[11] = dec32le(b + len + 44); + partial_reduce(t, t); + enc32le(tmp, t[0]); + enc32le(tmp + 4, t[1]); + enc32le(tmp + 8, t[2]); + enc32le(tmp + 12, t[3]); + enc32le(tmp + 16, t[4]); + enc32le(tmp + 20, t[5]); + enc32le(tmp + 24, t[6]); + enc32le(tmp + 28, t[7]); + enc32le(tmp + 32, t[8]); + enc32le(tmp + 36, t[9]); + enc32le(tmp + 40, t[10]); + enc32le(tmp + 44, t[11]); + fp_t a; + fp_decode(&a, tmp); + fp_add(d, d, &a); + } +} diff --git a/src/pqm4/sqisign_lvl3/ref/hd.c b/src/pqm4/sqisign_lvl3/ref/hd.c new file mode 100644 index 0000000..0424108 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/hd.c @@ -0,0 +1,93 @@ +#include +#include + +void +double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2) +{ + ec_dbl(&out->P1, &in->P1, &E1E2->E1); + ec_dbl(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + memmove(out, in, sizeof(theta_couple_point_t)); + } else { + double_couple_point(out, in, E1E2); + for (unsigned i = 0; i < n - 1; i++) { + double_couple_point(out, out, E1E2); + } + } +} + +void +add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2) +{ + ADD(&out->P1, &T1->P1, &T2->P1, &E1E2->E1); + ADD(&out->P2, &T1->P2, &T2->P2, &E1E2->E2); +} + +void +double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + DBL(&out->P1, &in->P1, &E1E2->E1); + DBL(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + *out = *in; + } else if (n == 1) { + double_couple_jac_point(out, in, E1E2); + } else { + fp2_t a1, a2, t1, t2; + + jac_to_ws(&out->P1, &t1, &a1, &in->P1, &E1E2->E1); + jac_to_ws(&out->P2, &t2, &a2, &in->P2, &E1E2->E2); + + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + for (unsigned i = 0; i < n - 1; i++) { + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + } + + jac_from_ws(&out->P1, &out->P1, &a1, &E1E2->E1); + jac_from_ws(&out->P2, &out->P2, &a2, &E1E2->E2); + } +} + +void +couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP) +{ + jac_to_xz(&P->P1, &xyP->P1); + jac_to_xz(&P->P2, &xyP->P2); +} + +void +copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2) +{ + // Copy the basis on E1 to (P, _) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P1, &B1->P); + copy_point(&ker->T2.P1, &B1->Q); + copy_point(&ker->T1m2.P1, &B1->PmQ); + + // Copy the basis on E2 to (_, P) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P2, &B2->P); + copy_point(&ker->T2.P2, &B2->Q); + copy_point(&ker->T1m2.P2, &B2->PmQ); +} diff --git a/src/pqm4/sqisign_lvl3/ref/hd.h b/src/pqm4/sqisign_lvl3/ref/hd.h new file mode 100644 index 0000000..2b16e23 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/hd.h @@ -0,0 +1,435 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief The HD-isogenies algorithm required by the signature + * + */ + +#ifndef HD_H +#define HD_H + +#include +#include +#include + +/** @defgroup hd_module Abelian surfaces and their isogenies + * @{ + */ + +#define HD_extra_torsion 2 + +/** @defgroup hd_struct Data structures for dimension 2 + * @{ + */ + +/** @brief Type for couple point with XZ coordinates + * @typedef theta_couple_point_t + * + * @struct theta_couple_point + * + * Structure for the couple point on an elliptic product + * using XZ coordinates + */ +typedef struct theta_couple_point +{ + ec_point_t P1; + ec_point_t P2; +} theta_couple_point_t; + +/** @brief Type for three couple points T1, T2, T1-T2 with XZ coordinates + * @typedef theta_kernel_couple_points_t + * + * @struct theta_kernel_couple_points + * + * Structure for a triple of theta couple points T1, T2 and T1 - T2 + */ +typedef struct theta_kernel_couple_points +{ + theta_couple_point_t T1; + theta_couple_point_t T2; + theta_couple_point_t T1m2; +} theta_kernel_couple_points_t; + +/** @brief Type for couple point with XYZ coordinates + * @typedef theta_couple_jac_point_t + * + * @struct theta_couple_jac_point + * + * Structure for the couple point on an elliptic product + * using XYZ coordinates + */ +typedef struct theta_couple_jac_point +{ + jac_point_t P1; + jac_point_t P2; +} theta_couple_jac_point_t; + +/** @brief Type for couple curve * + * @typedef theta_couple_curve_t + * + * @struct theta_couple_curve + * + * the theta_couple_curve structure + */ +typedef struct theta_couple_curve +{ + ec_curve_t E1; + ec_curve_t E2; +} theta_couple_curve_t; + +/** @brief Type for a product E1 x E2 with corresponding bases + * @typedef theta_couple_curve_with_basis_t + * + * @struct theta_couple_curve_with_basis + * + * tType for a product E1 x E2 with corresponding bases Ei[2^n] + */ +typedef struct theta_couple_curve_with_basis +{ + ec_curve_t E1; + ec_curve_t E2; + ec_basis_t B1; + ec_basis_t B2; +} theta_couple_curve_with_basis_t; + +/** @brief Type for theta point * + * @typedef theta_point_t + * + * @struct theta_point + * + * the theta_point structure used + */ +typedef struct theta_point +{ + fp2_t x; + fp2_t y; + fp2_t z; + fp2_t t; +} theta_point_t; + +/** @brief Type for theta point with repeating components + * @typedef theta_point_compact_t + * + * @struct theta_point_compact + * + * the theta_point structure used for points with repeated components + */ +typedef struct theta_point_compact +{ + fp2_t x; + fp2_t y; +} theta_point_compact_t; + +/** @brief Type for theta structure * + * @typedef theta_structure_t + * + * @struct theta_structure + * + * the theta_structure structure used + */ +typedef struct theta_structure +{ + theta_point_t null_point; + bool precomputation; + + // Eight precomputed values used for doubling and + // (2,2)-isogenies. + fp2_t XYZ0; + fp2_t YZT0; + fp2_t XZT0; + fp2_t XYT0; + + fp2_t xyz0; + fp2_t yzt0; + fp2_t xzt0; + fp2_t xyt0; +} theta_structure_t; + +/** @brief A 2x2 matrix used for action by translation + * @typedef translation_matrix_t + * + * @struct translation_matrix + * + * Structure to hold 4 fp2_t elements representing a 2x2 matrix used when computing + * a compatible theta structure during gluing. + */ +typedef struct translation_matrix +{ + fp2_t g00; + fp2_t g01; + fp2_t g10; + fp2_t g11; +} translation_matrix_t; + +/** @brief A 4x4 matrix used for basis changes + * @typedef basis_change_matrix_t + * + * @struct basis_change_matrix + * + * Structure to hold 16 elements representing a 4x4 matrix used for changing + * the basis of a theta point. + */ +typedef struct basis_change_matrix +{ + fp2_t m[4][4]; +} basis_change_matrix_t; + +/** @brief Type for gluing (2,2) theta isogeny * + * @typedef theta_gluing_t + * + * @struct theta_gluing + * + * the theta_gluing structure + */ +typedef struct theta_gluing +{ + + theta_couple_curve_t domain; + theta_couple_jac_point_t xyK1_8; + theta_point_compact_t imageK1_8; + basis_change_matrix_t M; + theta_point_t precomputation; + theta_point_t codomain; + +} theta_gluing_t; + +/** @brief Type for standard (2,2) theta isogeny * + * @typedef theta_isogeny_t + * + * @struct theta_isogeny + * + * the theta_isogeny structure + */ +typedef struct theta_isogeny +{ + theta_point_t T1_8; + theta_point_t T2_8; + bool hadamard_bool_1; + bool hadamard_bool_2; + theta_structure_t domain; + theta_point_t precomputation; + theta_structure_t codomain; +} theta_isogeny_t; + +/** @brief Type for splitting isomorphism * + * @typedef theta_splitting_t + * + * @struct theta_splitting + * + * the theta_splitting structure + */ +typedef struct theta_splitting +{ + basis_change_matrix_t M; + theta_structure_t B; + +} theta_splitting_t; + +// end of hd_struct +/** + * @} + */ + +/** @defgroup hd_functions Functions for dimension 2 + * @{ + */ + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param n : the number of iteration + * @param E1E2 an elliptic product + * @param in the theta couple point in the elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the addition of two points in (X : Y : Z) coordinates on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param T1 the theta couple jac point in the elliptic product + * @param T2 the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1, P2), (Q1, Q2) + * out = (P1 + Q1, P2 + Q2) + * + **/ +void add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple jac point in on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param n : the number of iteration + * @param in the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief A forgetful function which returns (X : Z) points given a pair of (X : Y : Z) points + * + * @param P Output: the theta_couple_point + * @param xyP : the theta_couple_jac_point + **/ +void couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it does extra isotropy + * checks on the kernel. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it selects a random Montgomery + * model of the codomain. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success, 0 on failure + * + */ +int theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Given a bases B1 on E1 and B2 on E2 copies this to create a kernel + * on E1 x E2 as couple points T1, T2 and T1 - T2 + * + * @param ker Output: a kernel for dim_two_isogenies (T1, T2, T1-T2) + * @param B1 Input basis on E1 + * @param B2 Input basis on E2 + **/ +void copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2); + +/** + * @brief Given a couple of points (P1, P2) on a couple of curves (E1, E2) + * this function tests if both points are of order exactly 2^t + * + * @param T: couple point (P1, P2) + * @param E: a couple of curves (E1, E2) + * @param t: an integer + * @returns 0xFFFFFFFF on success, 0 on failure + */ +static int +test_couple_point_order_twof(const theta_couple_point_t *T, const theta_couple_curve_t *E, int t) +{ + int check_P1 = test_point_order_twof(&T->P1, &E->E1, t); + int check_P2 = test_point_order_twof(&T->P2, &E->E2, t); + + return check_P1 & check_P2; +} + +// end of hd_functions +/** + * @} + */ +// end of hd_module +/** + * @} + */ +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.c b/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.c new file mode 100644 index 0000000..d980d12 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.c @@ -0,0 +1,143 @@ +#include + +#define FP2_ZERO 0 +#define FP2_ONE 1 +#define FP2_I 2 +#define FP2_MINUS_ONE 3 +#define FP2_MINUS_I 4 + +const int EVEN_INDEX[10][2] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 2}, {2, 0}, {2, 1}, {3, 0}, {3, 3}}; +const int CHI_EVAL[4][4] = {{1, 1, 1, 1}, {1, -1, 1, -1}, {1, 1, -1, -1}, {1, -1, -1, 1}}; +const fp2_t FP2_CONSTANTS[5] = {{ +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0xfc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2} +#elif RADIX == 32 +{0x3f0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10000} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x3, 0x0, 0x0, 0x0, 0x0, 0x3d00000000000000} +#else +{0x7, 0x0, 0x0, 0x0, 0x0, 0x0, 0xe400000000000} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0xfc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2} +#elif RADIX == 32 +{0x3f0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10000} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x3, 0x0, 0x0, 0x0, 0x0, 0x3d00000000000000} +#else +{0x7, 0x0, 0x0, 0x0, 0x0, 0x0, 0xe400000000000} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x1f03, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0xfff, 0x1e} +#elif RADIX == 32 +{0xffffc0f, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0x30fff} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xfffffffffffffffc, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x3ffffffffffffff} +#else +{0x7ffffffffffff8, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x1ffffffffffff} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x1f03, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0xfff, 0x1e} +#elif RADIX == 32 +{0xffffc0f, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff, 0x30fff} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xfffffffffffffffc, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x3ffffffffffffff} +#else +{0x7ffffffffffff8, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x7fffffffffffff, 0x1ffffffffffff} +#endif +#endif +}}; +const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10] = {{{{FP2_ONE, FP2_I, FP2_ONE, FP2_I}, {FP2_ONE, FP2_MINUS_I, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_MINUS_ONE, FP2_MINUS_I}, {FP2_MINUS_ONE, FP2_I, FP2_MINUS_ONE, FP2_I}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}}; +const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6] = {{{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}, {{{FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}}}}; diff --git a/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.h b/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.h new file mode 100644 index 0000000..b3147a4 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/hd_splitting_transforms.h @@ -0,0 +1,18 @@ +#ifndef HD_SPLITTING_H +#define HD_SPLITTING_H + +#include +#include + +typedef struct precomp_basis_change_matrix { + uint8_t m[4][4]; +} precomp_basis_change_matrix_t; + +extern const int EVEN_INDEX[10][2]; +extern const int CHI_EVAL[4][4]; +extern const fp2_t FP2_CONSTANTS[5]; +extern const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10]; +extern const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6]; + +#endif + diff --git a/src/pqm4/sqisign_lvl3/ref/isog.h b/src/pqm4/sqisign_lvl3/ref/isog.h new file mode 100644 index 0000000..b251ca3 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/isog.h @@ -0,0 +1,28 @@ +#ifndef _ISOG_H_ +#define _ISOG_H_ +#include +#include + +/* KPS structure for isogenies of degree 2 or 4 */ +typedef struct +{ + ec_point_t K; +} ec_kps2_t; +typedef struct +{ + ec_point_t K[3]; +} ec_kps4_t; + +void xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P); // degree-2 isogeny construction +void xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24); + +void xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P); // degree-4 isogeny construction +void xisog_4_singular(ec_kps4_t *kps, ec_point_t *B24, const ec_point_t P, ec_point_t A24); + +void xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps); +void xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps); + +void xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps); +void xeval_4_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_point_t P, const ec_kps4_t *kps); + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/isog_chains.c b/src/pqm4/sqisign_lvl3/ref/isog_chains.c new file mode 100644 index 0000000..abc9808 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/isog_chains.c @@ -0,0 +1,241 @@ +#include "isog.h" +#include + +// since we use degree 4 isogeny steps, we need to handle the odd case with care +static uint32_t +ec_eval_even_strategy(ec_curve_t *curve, + ec_point_t *points, + unsigned len_points, + const ec_point_t *kernel, + const int isog_len) +{ + ec_curve_normalize_A24(curve); + ec_point_t A24; + copy_point(&A24, &curve->A24); + + int space = 1; + for (int i = 1; i < isog_len; i *= 2) + ++space; + + // Stack of remaining kernel points and their associated orders + ec_point_t splits[space]; + uint16_t todo[space]; + splits[0] = *kernel; + todo[0] = isog_len; + + int current = 0; // Pointer to current top of stack + + // Chain of 4-isogenies + for (int j = 0; j < isog_len / 2; ++j) { + assert(current >= 0); + assert(todo[current] >= 1); + // Get the next point of order 4 + while (todo[current] != 2) { + assert(todo[current] >= 3); + // A new split will be added + ++current; + assert(current < space); + // We set the seed of the new split to be computed and saved + copy_point(&splits[current], &splits[current - 1]); + // if we copied from the very first element, then we perform one additional doubling + unsigned num_dbls = todo[current - 1] / 4 * 2 + todo[current - 1] % 2; + todo[current] = todo[current - 1] - num_dbls; + while (num_dbls--) + xDBL_A24(&splits[current], &splits[current], &A24, false); + } + + if (j == 0) { + assert(fp2_is_one(&A24.z)); + if (!ec_is_four_torsion(&splits[current], curve)) + return -1; + + ec_point_t T; + xDBL_A24(&T, &splits[current], &A24, false); + if (fp2_is_zero(&T.x)) + return -1; // special isogenies not allowed + } else { + assert(todo[current] == 2); +#ifndef NDEBUG + if (fp2_is_zero(&splits[current].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + + ec_point_t test; + xDBL_A24(&test, &splits[current], &A24, false); + if (fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly zero before doubling"); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + } + + // Evaluate 4-isogeny + ec_kps4_t kps4; + xisog_4(&kps4, &A24, splits[current]); + xeval_4(splits, splits, current, &kps4); + for (int i = 0; i < current; ++i) + todo[i] -= 2; + xeval_4(points, points, len_points, &kps4); + + --current; + } + assert(isog_len % 2 ? !current : current == -1); + + // Final 2-isogeny + if (isog_len % 2) { +#ifndef NDEBUG + if (fp2_is_zero(&splits[0].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + ec_point_t test; + copy_point(&test, &splits[0]); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + + // We need to check the order of this point in case there were no 4-isogenies + if (isog_len == 1 && !ec_is_two_torsion(&splits[0], curve)) + return -1; + if (fp2_is_zero(&splits[0].x)) { + // special isogenies not allowed + // this case can only happen if isog_len == 1; otherwise the + // previous 4-isogenies we computed ensure that $T=(0:1)$ is put + // as the kernel of the dual isogeny + return -1; + } + + ec_kps2_t kps2; + xisog_2(&kps2, &A24, splits[0]); + xeval_2(points, points, len_points, &kps2); + } + + // Output curve in the form (A:C) + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + + return 0; +} + +uint32_t +ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points) +{ + copy_curve(image, &phi->curve); + return ec_eval_even_strategy(image, points, len_points, &phi->kernel, phi->length); +} + +// naive implementation +uint32_t +ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special) // do we allow special isogenies? +{ + + ec_point_t A24; + AC_to_A24(&A24, curve); + + ec_kps2_t kps; + ec_point_t small_K, big_K; + copy_point(&big_K, kernel); + + for (int i = 0; i < len; i++) { + copy_point(&small_K, &big_K); + // small_K = big_K; + for (int j = 0; j < len - i - 1; j++) { + xDBL_A24(&small_K, &small_K, &A24, false); + } + // Check the order of the point before the first isogeny step + if (i == 0 && !ec_is_two_torsion(&small_K, curve)) + return (uint32_t)-1; + // Perform isogeny step + if (fp2_is_zero(&small_K.x)) { + if (special) { + ec_point_t B24; + xisog_2_singular(&kps, &B24, A24); + xeval_2_singular(&big_K, &big_K, 1, &kps); + xeval_2_singular(points, points, len_points, &kps); + copy_point(&A24, &B24); + } else { + return (uint32_t)-1; + } + } else { + xisog_2(&kps, &A24, small_K); + xeval_2(&big_K, &big_K, 1, &kps); + xeval_2(points, points, len_points, &kps); + } + } + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + return 0; +} + +uint32_t +ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to) +{ + fp2_t t0, t1, t2, t3, t4; + + fp2_mul(&t0, &from->A, &from->C); + fp2_mul(&t1, &to->A, &to->C); + + fp2_mul(&t2, &t1, &to->C); // toA*toC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*toA*toC^2 + fp2_sqr(&t3, &to->A); + fp2_mul(&t3, &t3, &to->A); // toA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->Nx, &t3, &t2); // 2*toA^3-9*toA*toC^2 + fp2_mul(&t2, &t0, &from->A); // fromA^2*fromC + fp2_sqr(&t3, &from->C); + fp2_mul(&t3, &t3, &from->C); // fromC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*fromC^3 + fp2_sub(&t3, &t3, &t2); // 3*fromC^3-fromA^2*fromC + fp2_mul(&isom->Nx, &isom->Nx, &t3); // lambda_x = (2*toA^3-9*toA*toC^2)*(3*fromC^3-fromA^2*fromC) + + fp2_mul(&t2, &t0, &from->C); // fromA*fromC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*fromA*fromC^2 + fp2_sqr(&t3, &from->A); + fp2_mul(&t3, &t3, &from->A); // fromA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->D, &t3, &t2); // 2*fromA^3-9*fromA*fromC^2 + fp2_mul(&t2, &t1, &to->A); // toA^2*toC + fp2_sqr(&t3, &to->C); + fp2_mul(&t3, &t3, &to->C); // toC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*toC^3 + fp2_sub(&t3, &t3, &t2); // 3*toC^3-toA^2*toC + fp2_mul(&isom->D, &isom->D, &t3); // lambda_z = (2*fromA^3-9*fromA*fromC^2)*(3*toC^3-toA^2*toC) + + // Mont -> SW -> SW -> Mont + fp2_mul(&t0, &to->C, &from->A); + fp2_mul(&t0, &t0, &isom->Nx); // lambda_x*toC*fromA + fp2_mul(&t1, &from->C, &to->A); + fp2_mul(&t1, &t1, &isom->D); // lambda_z*fromC*toA + fp2_sub(&isom->Nz, &t0, &t1); // lambda_x*toC*fromA - lambda_z*fromC*toA + fp2_mul(&t0, &from->C, &to->C); + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // 3*fromC*toC + fp2_mul(&isom->D, &isom->D, &t0); // 3*lambda_z*fromC*toC + fp2_mul(&isom->Nx, &isom->Nx, &t0); // 3*lambda_x*fromC*toC + + return (fp2_is_zero(&isom->Nx) | fp2_is_zero(&isom->D)); +} + +void +ec_iso_eval(ec_point_t *P, ec_isom_t *isom) +{ + fp2_t tmp; + fp2_mul(&P->x, &P->x, &isom->Nx); + fp2_mul(&tmp, &P->z, &isom->Nz); + fp2_add(&P->x, &P->x, &tmp); + fp2_mul(&P->z, &P->z, &isom->D); +} diff --git a/src/pqm4/sqisign_lvl3/ref/mp.c b/src/pqm4/sqisign_lvl3/ref/mp.c new file mode 100644 index 0000000..27f4a96 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/mp.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include + +// double-wide multiplication +void +MUL(digit_t *out, const digit_t a, const digit_t b) +{ +#ifdef RADIX_32 + uint64_t r = (uint64_t)a * b; + out[0] = r & 0xFFFFFFFFUL; + out[1] = r >> 32; + +#elif defined(RADIX_64) && defined(_MSC_VER) + uint64_t umul_hi; + out[0] = _umul128(a, b, &umul_hi); + out[1] = umul_hi; + +#elif defined(RADIX_64) && defined(HAVE_UINT128) + unsigned __int128 umul_tmp; + umul_tmp = (unsigned __int128)(a) * (unsigned __int128)(b); + out[0] = (uint64_t)umul_tmp; + out[1] = (uint64_t)(umul_tmp >> 64); + +#else + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al * bl; + albh = al * bh; + ahbl = ah * bl; + ahbh = ah * bh; + out[0] = albl & mask_low; // out00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + out[0] ^= temp << (sizeof(digit_t) * 4); // out01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + out[1] = temp & mask_low; // out10 + carry = temp & mask_high; + out[1] ^= (ahbh & mask_high) + carry; // out11 + +#endif +} + +void +mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision addition + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(c[i], carry, a[i], b[i], carry); + } +} + +digit_t +mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision right shift by 1...RADIX-1 + digit_t bit_out = x[0] & 1; + + for (unsigned int i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], shift, x[i], RADIX); + } + x[nwords - 1] >>= shift; + return bit_out; +} + +void +mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision left shift by 1...RADIX-1 + + for (int i = nwords - 1; i > 0; i--) { + SHIFTL(x[i], x[i - 1], shift, x[i], RADIX); + } + x[0] <<= shift; +} + +void +multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ + int t = shift; + while (t > RADIX - 1) { + mp_shiftl(x, RADIX - 1, nwords); + t = t - (RADIX - 1); + } + mp_shiftl(x, t, nwords); +} + +// The below functions were taken from the EC module + +void +mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision subtraction, assuming a > b + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(c[i], borrow, a[i], b[i], borrow); + } +} + +void +select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords) +{ // Select c <- a if mask = 0, select c <- b if mask = 1...1 + + for (int i = 0; i < nwords; i++) { + c[i] = ((a[i] ^ b[i]) & mask) ^ a[i]; + } +} + +void +swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords) +{ // Swap entries + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then a <- b and b <- a + digit_t temp; + + for (int i = 0; i < nwords; i++) { + temp = option & (a[i] ^ b[i]); + a[i] = temp ^ a[i]; + b[i] = temp ^ b[i]; + } +} + +int +mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords) +{ // Multiprecision comparison, a=b? : (1) a>b, (0) a=b, (-1) a= 0; i--) { + if (a[i] > b[i]) + return 1; + else if (a[i] < b[i]) + return -1; + } + return 0; +} + +bool +mp_is_zero(const digit_t *a, unsigned int nwords) +{ // Is a multiprecision element zero? + // Returns 1 (true) if a=0, 0 (false) otherwise + digit_t r = 0; + + for (unsigned int i = 0; i < nwords; i++) + r |= a[i] ^ 0; + + return (bool)is_digit_zero_ct(r); +} + +void +mp_mul2(digit_t *c, const digit_t *a, const digit_t *b) +{ // Multiprecision multiplication fixed to two-digit operands + unsigned int carry = 0; + digit_t t0[2], t1[2], t2[2]; + + MUL(t0, a[0], b[0]); + MUL(t1, a[0], b[1]); + ADDC(t0[1], carry, t0[1], t1[0], carry); + ADDC(t1[1], carry, 0, t1[1], carry); + MUL(t2, a[1], b[1]); + ADDC(t2[0], carry, t2[0], t1[1], carry); + ADDC(t2[1], carry, 0, t2[1], carry); + c[0] = t0[0]; + c[1] = t0[1]; + c[2] = t2[0]; + c[3] = t2[1]; +} + +void +mp_print(const digit_t *a, size_t nwords) +{ + printf("0x"); + for (size_t i = 0; i < nwords; i++) { +#ifdef RADIX_32 + printf("%08" PRIx32, a[nwords - i - 1]); // Print each word with 8 hex digits +#elif defined(RADIX_64) + printf("%016" PRIx64, a[nwords - i - 1]); // Print each word with 16 hex digits +#endif + } +} + +void +mp_copy(digit_t *b, const digit_t *a, size_t nwords) +{ + for (size_t i = 0; i < nwords; i++) { + b[i] = a[i]; + } +} + +void +mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords) +{ + // Multiprecision multiplication, c = a*b, for nwords-digit inputs, with nwords-digit output + // explicitly does not use the higher half of c, as we do not need in our applications + digit_t carry, UV[2], t[nwords], cc[nwords]; + + for (size_t i = 0; i < nwords; i++) { + cc[i] = 0; + } + + for (size_t i = 0; i < nwords; i++) { + + MUL(t, a[i], b[0]); + + for (size_t j = 1; j < nwords - 1; j++) { + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + t[j + 1] = UV[1] + carry; + } + + int j = nwords - 1; + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + + mp_add(&cc[i], &cc[i], t, nwords - i); + } + + mp_copy(c, cc, nwords); +} + +void +mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords) +{ // Multiprecision modulo 2^e, with 0 <= a < 2^(e) + unsigned int i, q = e >> LOG2RADIX, r = e & (RADIX - 1); + + if (q < nwords) { + a[q] &= ((digit_t)1 << r) - 1; + + for (i = q + 1; i < nwords; i++) { + a[i] = 0; + } + } +} + +void +mp_neg(digit_t *a, unsigned int nwords) +{ // negates a + for (size_t i = 0; i < nwords; i++) { + a[i] ^= -1; + } + + a[0] += 1; +} + +bool +mp_is_one(const digit_t *x, unsigned int nwords) +{ // returns true if x represents 1, and false otherwise + if (x[0] != 1) { + return false; + } + + for (size_t i = 1; i < nwords; i++) { + if (x[i] != 0) { + return false; + } + } + return true; +} + +void +mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords) +{ // Inversion modulo 2^e, using Newton's method and Hensel lifting + // we take the first power of 2 larger than e to use + // requires a to be odd, of course + // returns b such that a*b = 1 mod 2^e + assert((a[0] & 1) == 1); + + digit_t x[nwords], y[nwords], aa[nwords], mp_one[nwords], tmp[nwords]; + mp_copy(aa, a, nwords); + + mp_one[0] = 1; + for (unsigned int i = 1; i < nwords; i++) { + mp_one[i] = 0; + } + + int p = 1; + while ((1 << p) < e) { + p++; + } + p -= 2; // using k = 4 for initial inverse + int w = (1 << (p + 2)); + + mp_mod_2exp(aa, w, nwords); + mp_add(x, aa, aa, nwords); + mp_add(x, x, aa, nwords); // should be 3a + x[0] ^= (1 << 1); // so that x equals (3a)^2 xor 2 + mp_mod_2exp(x, w, nwords); // now x*a = 1 mod 2^4, which we lift + + mp_mul(tmp, aa, x, nwords); + mp_neg(tmp, nwords); + mp_add(y, mp_one, tmp, nwords); + + // Hensel lifting for p rounds + for (int i = 0; i < p; i++) { + mp_add(tmp, mp_one, y, nwords); + mp_mul(x, x, tmp, nwords); + mp_mul(y, y, y, nwords); + } + + mp_mod_2exp(x, w, nwords); + mp_copy(b, x, nwords); + + // verify results + mp_mul(x, x, aa, nwords); + mp_mod_2exp(x, w, nwords); + assert(mp_is_one(x, nwords)); +} + +void +mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords) +{ + // given a matrix ( ( a, b ), (c, d) ) of values mod 2^e + // returns the inverse matrix gamma ( (d, -b), (-c, a) ) + // where gamma is the inverse of the determinant a*d - b*c + // assumes the matrix is invertible, otherwises, inversion of determinant fails + + int p = 1; + while ((1 << p) < e) { + p++; + } + int w = (1 << (p)); + + digit_t det[nwords], tmp[nwords], resa[nwords], resb[nwords], resc[nwords], resd[nwords]; + mp_mul(tmp, r1, s2, nwords); + mp_mul(det, r2, s1, nwords); + mp_sub(det, tmp, det, nwords); + mp_inv_2e(det, det, e, nwords); + + mp_mul(resa, det, s2, nwords); + mp_mul(resb, det, r2, nwords); + mp_mul(resc, det, s1, nwords); + mp_mul(resd, det, r1, nwords); + + mp_neg(resb, nwords); + mp_neg(resc, nwords); + + mp_mod_2exp(resa, w, nwords); + mp_mod_2exp(resb, w, nwords); + mp_mod_2exp(resc, w, nwords); + mp_mod_2exp(resd, w, nwords); + + mp_copy(r1, resa, nwords); + mp_copy(r2, resb, nwords); + mp_copy(s1, resc, nwords); + mp_copy(s2, resd, nwords); +} diff --git a/src/pqm4/sqisign_lvl3/ref/mp.h b/src/pqm4/sqisign_lvl3/ref/mp.h new file mode 100644 index 0000000..b3733b5 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/mp.h @@ -0,0 +1,88 @@ +#ifndef MP_H +#define MP_H + +#include +#include +#include + +// Functions taken from the GF module + +void mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +digit_t mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords); +void multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void MUL(digit_t *out, const digit_t a, const digit_t b); + +// Functions taken from the EC module + +void mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +void select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords); +void swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords); +int mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords); +bool mp_is_zero(const digit_t *a, unsigned int nwords); +void mp_mul2(digit_t *c, const digit_t *a, const digit_t *b); + +// Further functions for multiprecision arithmetic +void mp_print(const digit_t *a, size_t nwords); +void mp_copy(digit_t *b, const digit_t *a, size_t nwords); +void mp_neg(digit_t *a, unsigned int nwords); +bool mp_is_one(const digit_t *x, unsigned int nwords); +void mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords); +void mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords); +void mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords); +void mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords); + +#define mp_is_odd(x, nwords) (((nwords) != 0) & (int)(x)[0]) +#define mp_is_even(x, nwords) (!mp_is_odd(x, nwords)) + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise +static inline unsigned int +is_digit_nonzero_ct(digit_t x) +{ // Is x != 0? + return (unsigned int)((x | (0 - x)) >> (RADIX - 1)); +} + +static inline unsigned int +is_digit_zero_ct(digit_t x) +{ // Is x = 0? + return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); +} + +static inline unsigned int +is_digit_lessthan_ct(digit_t x, digit_t y) +{ // Is x < y? + return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Platform-independent macros for digit-size operations + * **********************/ + +// Digit addition with carry +#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = \ + (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/pqm4_api.c b/src/pqm4/sqisign_lvl3/ref/pqm4_api.c new file mode 100644 index 0000000..5f1b121 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/pqm4_api.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +typedef struct { + size_t mlen; + char msg[59]; + size_t smlen; + char sm[59 + CRYPTO_BYTES]; +} SQISign_KAT_t; + +const char kat_lvl3_pk[CRYPTO_PUBLICKEYBYTES] = { + 0xBE, 0xAA, 0x01, 0xC6, 0x82, 0x45, 0xC6, 0x4B, 0x6C, 0x96, 0xED, 0xE0, 0x90, 0x89, 0xD0, 0x89, 0xBA, 0x1C, 0x42, 0x0B, 0xD3, 0xA9, 0xA0, 0xDC, 0x96, 0xBA, 0x81, 0x3F, 0x62, 0x7B, 0xA9, 0xAF, 0xB5, 0xE4, 0xEC, 0x6B, 0x70, 0x25, 0x27, 0x99, 0x2D, 0x3D, 0xFE, 0x07, 0x85, 0x0F, 0x20, 0x30, 0xFB, 0xC3, 0x17, 0x73, 0x53, 0x94, 0xBA, 0xFB, 0xA5, 0x44, 0xFB, 0x30, 0x4F, 0x29, 0xC0, 0x4D, 0x6F, 0x97, 0x85, 0x2D, 0x4D, 0x14, 0x13, 0x4E, 0x4F, 0x7C, 0x03, 0x43, 0x84, 0x48, 0x23, 0x01, 0x1B, 0x77, 0x2C, 0xD9, 0xD3, 0x09, 0x00, 0x66, 0x1D, 0x26, 0xB8, 0x54, 0x44, 0xF2, 0x31, 0x2B, 0x02, +}; + +const SQISign_KAT_t kat_lvl3[2] = { + { + .mlen = 32, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 32 + CRYPTO_BYTES, + .sm = { 0x24, 0x9A, 0xA4, 0x39, 0x1C, 0x30, 0x7F, 0xA3, 0xE4, 0x0A, 0x69, 0xAE, 0x23, 0xF2, 0x53, 0x15, 0x56, 0x4A, 0x53, 0xC4, 0x3F, 0x2E, 0xF8, 0x0B, 0x10, 0xE2, 0x26, 0x5E, 0x65, 0xA8, 0x1C, 0xDC, 0x67, 0xB6, 0x86, 0x11, 0x7C, 0x56, 0xC0, 0x63, 0x47, 0x20, 0x84, 0x67, 0x60, 0x8D, 0xCC, 0x02, 0x99, 0x66, 0xD0, 0x68, 0x64, 0x8F, 0xFE, 0xC5, 0x6A, 0xA8, 0x53, 0x71, 0x52, 0x57, 0x82, 0xF0, 0x98, 0x6A, 0xA0, 0xAB, 0x29, 0x53, 0x7A, 0x0B, 0xE8, 0x73, 0xE4, 0x1A, 0x99, 0x44, 0x1A, 0x5C, 0x73, 0x0C, 0x1E, 0x03, 0xEB, 0x28, 0x82, 0x93, 0x42, 0xAE, 0x14, 0xB0, 0x8D, 0x26, 0xDB, 0x03, 0x00, 0x02, 0x51, 0x9B, 0x4F, 0xF9, 0x41, 0xE2, 0xDF, 0x67, 0x34, 0xDF, 0x76, 0x3D, 0x2D, 0xD9, 0xA9, 0x48, 0xE5, 0x74, 0x08, 0x8E, 0xB7, 0x88, 0xD0, 0x5B, 0x03, 0x3E, 0x2E, 0x75, 0x78, 0x60, 0xD7, 0x88, 0x15, 0x7E, 0x6A, 0x68, 0x4F, 0x47, 0x25, 0x68, 0x80, 0x33, 0x16, 0x7C, 0x90, 0xDC, 0x91, 0x58, 0xC9, 0x00, 0x7A, 0xA2, 0x5F, 0x15, 0x91, 0xA8, 0x9C, 0x0A, 0xEA, 0x02, 0x6F, 0x23, 0xCE, 0xDE, 0xB6, 0xF9, 0x35, 0x00, 0x37, 0x2A, 0x84, 0x07, 0x34, 0xA8, 0x03, 0x48, 0x0C, 0xDC, 0x60, 0x19, 0x14, 0xE5, 0x32, 0x67, 0x28, 0xA4, 0x6F, 0x8F, 0xB8, 0x21, 0x64, 0x96, 0x11, 0x9D, 0xDE, 0xF8, 0x59, 0x59, 0x4E, 0x02, 0x5E, 0x06, 0xD7, 0x9C, 0x30, 0x6D, 0x92, 0x10, 0x85, 0x19, 0x9A, 0x90, 0xFD, 0x1B, 0xDE, 0xF9, 0x49, 0xA6, 0x72, 0xA0, 0x17, 0x56, 0x5E, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, + { + .mlen = 59, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 59 + CRYPTO_BYTES, + .sm = { 0xFB, 0x52, 0xE6, 0x3A, 0x1B, 0xD2, 0x16, 0x0F, 0xFC, 0x99, 0x99, 0x7D, 0xF4, 0x83, 0xEE, 0x99, 0xC0, 0xE6, 0x92, 0x5C, 0x16, 0xBB, 0x3E, 0x6F, 0xDB, 0x86, 0x2C, 0x3A, 0xAC, 0x86, 0x37, 0x46, 0x1C, 0x5B, 0x68, 0xB4, 0xE0, 0xD1, 0x6C, 0x0B, 0x55, 0x6B, 0x8E, 0xD4, 0x57, 0xB0, 0xD0, 0x33, 0x74, 0xA4, 0xD8, 0x55, 0x82, 0x0F, 0xED, 0xE2, 0x5C, 0xF6, 0x09, 0x20, 0x53, 0x88, 0xF2, 0xB2, 0xBA, 0xEB, 0x9A, 0x8F, 0x9C, 0x6C, 0x37, 0x69, 0x52, 0xF1, 0x0E, 0xAB, 0xA7, 0x10, 0xBC, 0x26, 0x1B, 0x72, 0x78, 0xC4, 0xB9, 0x51, 0x9E, 0x0F, 0xB0, 0x1C, 0x53, 0x34, 0x4D, 0x4B, 0x2A, 0x16, 0x01, 0x01, 0x8F, 0xB7, 0x44, 0x4B, 0x16, 0x8A, 0xCC, 0x0E, 0xDE, 0x32, 0x0B, 0x0F, 0x83, 0x89, 0x01, 0xBA, 0xC6, 0x46, 0x7E, 0x70, 0x81, 0xC0, 0xC1, 0x58, 0x01, 0xBF, 0x55, 0x63, 0xA8, 0x3D, 0xFB, 0x89, 0x43, 0x83, 0x30, 0xE8, 0x14, 0x1C, 0xAB, 0xB4, 0x05, 0x80, 0x88, 0x60, 0x36, 0xFA, 0xA2, 0x3E, 0x74, 0x00, 0xEC, 0xE1, 0xEE, 0x7B, 0x38, 0x9C, 0x58, 0xE9, 0xD0, 0xA2, 0x4C, 0x04, 0xB6, 0xD2, 0x36, 0x1D, 0x20, 0xE8, 0x8E, 0x3D, 0xB8, 0x79, 0xEC, 0xF9, 0x00, 0xE2, 0x6E, 0x5D, 0xFE, 0xB0, 0x75, 0x22, 0x43, 0x37, 0xC7, 0x5C, 0x00, 0x4B, 0xE5, 0xE5, 0xDC, 0x54, 0xDA, 0xAD, 0x54, 0x7F, 0xEC, 0xB8, 0xCA, 0x00, 0xE3, 0x1D, 0x49, 0xA2, 0x67, 0xA7, 0x4B, 0x3F, 0xFC, 0x1E, 0xB3, 0x40, 0x08, 0xEA, 0x48, 0xBD, 0x81, 0x0E, 0xAE, 0xE1, 0x54, 0x68, 0xB2, 0x00, 0x02, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, +}; + +int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + memcpy(pk, kat_lvl3_pk, CRYPTO_PUBLICKEYBYTES); + // We don't need the secret key + memset(sk, 0, CRYPTO_SECRETKEYBYTES); +} + +int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + for (size_t i = 0; i < sizeof(kat_lvl3) / sizeof(kat_lvl3[0]); i++) { + if (mlen == kat_lvl3[i].mlen) { + memcpy(sm, kat_lvl3[i].sm, kat_lvl3[i].smlen); + *smlen = kat_lvl3[i].smlen; + return 0; + } + } + + return 1; +} + +int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, + size_t smlen, const unsigned char *pk) { + unsigned long long mlen_ull = *mlen; + int ret = sqisign_open(m, &mlen_ull, sm, smlen, pk); + if (mlen) { + *mlen = mlen_ull; + } + return ret; +} diff --git a/src/pqm4/sqisign_lvl3/ref/rng.h b/src/pqm4/sqisign_lvl3/ref/rng.h new file mode 100644 index 0000000..3c24d07 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/rng.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef rng_h +#define rng_h + +#include "randombytes.h" + +#endif /* rng_h */ diff --git a/src/pqm4/sqisign_lvl3/ref/sig.h b/src/pqm4/sqisign_lvl3/ref/sig.h new file mode 100644 index 0000000..4c33510 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/sig.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SQISIGN_H +#define SQISIGN_H + +#include +#include + +#if defined(ENABLE_SIGN) +/** + * SQIsign keypair generation. + * + * The implementation corresponds to SQIsign.CompactKeyGen() in the SQIsign spec. + * The caller is responsible to allocate sufficient memory to hold pk and sk. + * + * @param[out] pk SQIsign public key + * @param[out] sk SQIsign secret key + * @return int status code + */ +SQISIGN_API +int sqisign_keypair(unsigned char *pk, unsigned char *sk); + +/** + * SQIsign signature generation. + * + * The implementation performs SQIsign.expandSK() + SQIsign.sign() in the SQIsign spec. + * Keys provided is a compacted secret keys. + * The caller is responsible to allocate sufficient memory to hold sm. + * + * @param[out] sm Signature concatenated with message + * @param[out] smlen Pointer to the length of sm + * @param[in] m Message to be signed + * @param[in] mlen Message length + * @param[in] sk Compacted secret key + * @return int status code + */ +SQISIGN_API +int sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk); +#endif + +/** + * SQIsign open signature. + * + * The implementation performs SQIsign.verify(). If the signature verification succeeded, the + * original message is stored in m. Keys provided is a compact public key. The caller is responsible + * to allocate sufficient memory to hold m. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sm Signature concatenated with message + * @param[in] smlen Length of sm + * @param[in] pk Compacted public key + * @return int status code + */ +SQISIGN_API +int sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk); + +/** + * SQIsign verify signature. + * + * If the signature verification succeeded, returns 0, otherwise 1. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sig Signature + * @param[in] siglen Length of sig + * @param[in] pk Compacted public key + * @return int 0 if verification succeeded, 1 otherwise. + */ +SQISIGN_API +int sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk); + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/sqisign.c b/src/pqm4/sqisign_lvl3/ref/sqisign.c new file mode 100644 index 0000000..57fd75d --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/sqisign.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#if defined(ENABLE_SIGN) +#include +#endif + +#if defined(ENABLE_SIGN) +SQISIGN_API +int +sqisign_keypair(unsigned char *pk, unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + secret_key_init(&skt); + + ret = !protocols_keygen(&pkt, &skt); + + secret_key_to_bytes(sk, &skt, &pkt); + public_key_to_bytes(pk, &pkt); + secret_key_finalize(&skt); + return ret; +} + +SQISIGN_API +int +sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + signature_t sigt; + secret_key_init(&skt); + secret_key_from_bytes(&skt, &pkt, sk); + + memmove(sm + SIGNATURE_BYTES, m, mlen); + + ret = !protocols_sign(&sigt, &pkt, &skt, sm + SIGNATURE_BYTES, mlen); + if (ret != 0) { + *smlen = 0; + goto err; + } + + signature_to_bytes(sm, &sigt); + *smlen = SIGNATURE_BYTES + mlen; + +err: + secret_key_finalize(&skt); + return ret; +} +#endif + +SQISIGN_API +int +sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk) +{ + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sm); + + ret = !protocols_verify(&sigt, &pkt, sm + SIGNATURE_BYTES, smlen - SIGNATURE_BYTES); + + if (!ret) { + *mlen = smlen - SIGNATURE_BYTES; + memmove(m, sm + SIGNATURE_BYTES, *mlen); + } else { + *mlen = 0; + memset(m, 0, smlen - SIGNATURE_BYTES); + } + + return ret; +} + +SQISIGN_API +int +sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk) +{ + + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sig); + + ret = !protocols_verify(&sigt, &pkt, m, mlen); + + return ret; +} diff --git a/src/pqm4/sqisign_lvl3/ref/sqisign_namespace.h b/src/pqm4/sqisign_lvl3/ref/sqisign_namespace.h new file mode 100644 index 0000000..14fd51d --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/sqisign_namespace.h @@ -0,0 +1,1022 @@ + +#ifndef SQISIGN_NAMESPACE_H +#define SQISIGN_NAMESPACE_H + +//#define DISABLE_NAMESPACING + +#if defined(_WIN32) +#define SQISIGN_API __declspec(dllexport) +#else +#define SQISIGN_API __attribute__((visibility("default"))) +#endif + +#define PARAM_JOIN3_(a, b, c) sqisign_##a##_##b##_##c +#define PARAM_JOIN3(a, b, c) PARAM_JOIN3_(a, b, c) +#define PARAM_NAME3(end, s) PARAM_JOIN3(SQISIGN_VARIANT, end, s) + +#define PARAM_JOIN2_(a, b) sqisign_##a##_##b +#define PARAM_JOIN2(a, b) PARAM_JOIN2_(a, b) +#define PARAM_NAME2(end, s) PARAM_JOIN2(end, s) + +#ifndef DISABLE_NAMESPACING +#define SQISIGN_NAMESPACE_GENERIC(s) PARAM_NAME2(gen, s) +#else +#define SQISIGN_NAMESPACE_GENERIC(s) s +#endif + +#if defined(SQISIGN_VARIANT) && !defined(DISABLE_NAMESPACING) +#if defined(SQISIGN_BUILD_TYPE_REF) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(ref, s) +#elif defined(SQISIGN_BUILD_TYPE_OPT) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(opt, s) +#elif defined(SQISIGN_BUILD_TYPE_BROADWELL) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(broadwell, s) +#elif defined(SQISIGN_BUILD_TYPE_ARM64CRYPTO) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(arm64crypto, s) +#else +#error "Build type not known" +#endif + +#else +#define SQISIGN_NAMESPACE(s) s +#endif + +// Namespacing symbols exported from algebra.c: +#undef quat_alg_add +#undef quat_alg_conj +#undef quat_alg_coord_mul +#undef quat_alg_elem_copy +#undef quat_alg_elem_copy_ibz +#undef quat_alg_elem_equal +#undef quat_alg_elem_is_zero +#undef quat_alg_elem_mul_by_scalar +#undef quat_alg_elem_set +#undef quat_alg_equal_denom +#undef quat_alg_init_set_ui +#undef quat_alg_make_primitive +#undef quat_alg_mul +#undef quat_alg_norm +#undef quat_alg_normalize +#undef quat_alg_scalar +#undef quat_alg_sub + +#define quat_alg_add SQISIGN_NAMESPACE_GENERIC(quat_alg_add) +#define quat_alg_conj SQISIGN_NAMESPACE_GENERIC(quat_alg_conj) +#define quat_alg_coord_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_coord_mul) +#define quat_alg_elem_copy SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy) +#define quat_alg_elem_copy_ibz SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy_ibz) +#define quat_alg_elem_equal SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_equal) +#define quat_alg_elem_is_zero SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_is_zero) +#define quat_alg_elem_mul_by_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_mul_by_scalar) +#define quat_alg_elem_set SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_set) +#define quat_alg_equal_denom SQISIGN_NAMESPACE_GENERIC(quat_alg_equal_denom) +#define quat_alg_init_set_ui SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set_ui) +#define quat_alg_make_primitive SQISIGN_NAMESPACE_GENERIC(quat_alg_make_primitive) +#define quat_alg_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_mul) +#define quat_alg_norm SQISIGN_NAMESPACE_GENERIC(quat_alg_norm) +#define quat_alg_normalize SQISIGN_NAMESPACE_GENERIC(quat_alg_normalize) +#define quat_alg_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_scalar) +#define quat_alg_sub SQISIGN_NAMESPACE_GENERIC(quat_alg_sub) + +// Namespacing symbols exported from api.c: +#undef crypto_sign +#undef crypto_sign_keypair +#undef crypto_sign_open + +#define crypto_sign SQISIGN_NAMESPACE(crypto_sign) +#define crypto_sign_keypair SQISIGN_NAMESPACE(crypto_sign_keypair) +#define crypto_sign_open SQISIGN_NAMESPACE(crypto_sign_open) + +// Namespacing symbols exported from basis.c: +#undef ec_curve_to_basis_2f_from_hint +#undef ec_curve_to_basis_2f_to_hint +#undef ec_recover_y +#undef lift_basis +#undef lift_basis_normalized + +#define ec_curve_to_basis_2f_from_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_from_hint) +#define ec_curve_to_basis_2f_to_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_to_hint) +#define ec_recover_y SQISIGN_NAMESPACE(ec_recover_y) +#define lift_basis SQISIGN_NAMESPACE(lift_basis) +#define lift_basis_normalized SQISIGN_NAMESPACE(lift_basis_normalized) + +// Namespacing symbols exported from biextension.c: +#undef clear_cofac +#undef ec_dlog_2_tate +#undef ec_dlog_2_weil +#undef fp2_frob +#undef reduced_tate +#undef weil + +#define clear_cofac SQISIGN_NAMESPACE(clear_cofac) +#define ec_dlog_2_tate SQISIGN_NAMESPACE(ec_dlog_2_tate) +#define ec_dlog_2_weil SQISIGN_NAMESPACE(ec_dlog_2_weil) +#define fp2_frob SQISIGN_NAMESPACE(fp2_frob) +#define reduced_tate SQISIGN_NAMESPACE(reduced_tate) +#define weil SQISIGN_NAMESPACE(weil) + +// Namespacing symbols exported from common.c: +#undef hash_to_challenge +#undef public_key_finalize +#undef public_key_init + +#define hash_to_challenge SQISIGN_NAMESPACE(hash_to_challenge) +#define public_key_finalize SQISIGN_NAMESPACE(public_key_finalize) +#define public_key_init SQISIGN_NAMESPACE(public_key_init) + +// Namespacing symbols exported from dim2.c: +#undef ibz_2x2_mul_mod +#undef ibz_mat_2x2_add +#undef ibz_mat_2x2_copy +#undef ibz_mat_2x2_det_from_ibz +#undef ibz_mat_2x2_eval +#undef ibz_mat_2x2_inv_mod +#undef ibz_mat_2x2_set +#undef ibz_vec_2_set + +#define ibz_2x2_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_2x2_mul_mod) +#define ibz_mat_2x2_add SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_add) +#define ibz_mat_2x2_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_copy) +#define ibz_mat_2x2_det_from_ibz SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_det_from_ibz) +#define ibz_mat_2x2_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_eval) +#define ibz_mat_2x2_inv_mod SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_inv_mod) +#define ibz_mat_2x2_set SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_set) +#define ibz_vec_2_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_set) + +// Namespacing symbols exported from dim2id2iso.c: +#undef dim2id2iso_arbitrary_isogeny_evaluation +#undef dim2id2iso_ideal_to_isogeny_clapotis +#undef find_uv +#undef fixed_degree_isogeny_and_eval + +#define dim2id2iso_arbitrary_isogeny_evaluation SQISIGN_NAMESPACE(dim2id2iso_arbitrary_isogeny_evaluation) +#define dim2id2iso_ideal_to_isogeny_clapotis SQISIGN_NAMESPACE(dim2id2iso_ideal_to_isogeny_clapotis) +#define find_uv SQISIGN_NAMESPACE(find_uv) +#define fixed_degree_isogeny_and_eval SQISIGN_NAMESPACE(fixed_degree_isogeny_and_eval) + +// Namespacing symbols exported from dim4.c: +#undef ibz_inv_dim4_make_coeff_mpm +#undef ibz_inv_dim4_make_coeff_pmp +#undef ibz_mat_4x4_copy +#undef ibz_mat_4x4_equal +#undef ibz_mat_4x4_eval +#undef ibz_mat_4x4_eval_t +#undef ibz_mat_4x4_gcd +#undef ibz_mat_4x4_identity +#undef ibz_mat_4x4_inv_with_det_as_denom +#undef ibz_mat_4x4_is_identity +#undef ibz_mat_4x4_mul +#undef ibz_mat_4x4_negate +#undef ibz_mat_4x4_scalar_div +#undef ibz_mat_4x4_scalar_mul +#undef ibz_mat_4x4_transpose +#undef ibz_mat_4x4_zero +#undef ibz_vec_4_add +#undef ibz_vec_4_content +#undef ibz_vec_4_copy +#undef ibz_vec_4_copy_ibz +#undef ibz_vec_4_is_zero +#undef ibz_vec_4_linear_combination +#undef ibz_vec_4_negate +#undef ibz_vec_4_scalar_div +#undef ibz_vec_4_scalar_mul +#undef ibz_vec_4_set +#undef ibz_vec_4_sub +#undef quat_qf_eval + +#define ibz_inv_dim4_make_coeff_mpm SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_mpm) +#define ibz_inv_dim4_make_coeff_pmp SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_pmp) +#define ibz_mat_4x4_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_copy) +#define ibz_mat_4x4_equal SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_equal) +#define ibz_mat_4x4_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval) +#define ibz_mat_4x4_eval_t SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval_t) +#define ibz_mat_4x4_gcd SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_gcd) +#define ibz_mat_4x4_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_identity) +#define ibz_mat_4x4_inv_with_det_as_denom SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_inv_with_det_as_denom) +#define ibz_mat_4x4_is_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_identity) +#define ibz_mat_4x4_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_mul) +#define ibz_mat_4x4_negate SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_negate) +#define ibz_mat_4x4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_div) +#define ibz_mat_4x4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_mul) +#define ibz_mat_4x4_transpose SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_transpose) +#define ibz_mat_4x4_zero SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_zero) +#define ibz_vec_4_add SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_add) +#define ibz_vec_4_content SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_content) +#define ibz_vec_4_copy SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy) +#define ibz_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_ibz) +#define ibz_vec_4_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_is_zero) +#define ibz_vec_4_linear_combination SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination) +#define ibz_vec_4_negate SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_negate) +#define ibz_vec_4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_div) +#define ibz_vec_4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul) +#define ibz_vec_4_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_set) +#define ibz_vec_4_sub SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_sub) +#define quat_qf_eval SQISIGN_NAMESPACE_GENERIC(quat_qf_eval) + +// Namespacing symbols exported from ec.c: +#undef cswap_points +#undef ec_biscalar_mul +#undef ec_curve_init +#undef ec_curve_init_from_A +#undef ec_curve_normalize_A24 +#undef ec_curve_verify_A +#undef ec_dbl +#undef ec_dbl_iter +#undef ec_dbl_iter_basis +#undef ec_has_zero_coordinate +#undef ec_is_basis_four_torsion +#undef ec_is_equal +#undef ec_is_four_torsion +#undef ec_is_two_torsion +#undef ec_is_zero +#undef ec_j_inv +#undef ec_ladder3pt +#undef ec_mul +#undef ec_normalize_curve +#undef ec_normalize_curve_and_A24 +#undef ec_normalize_point +#undef ec_point_init +#undef select_point +#undef xADD +#undef xDBL +#undef xDBLADD +#undef xDBLMUL +#undef xDBL_A24 +#undef xDBL_E0 +#undef xMUL + +#define cswap_points SQISIGN_NAMESPACE(cswap_points) +#define ec_biscalar_mul SQISIGN_NAMESPACE(ec_biscalar_mul) +#define ec_curve_init SQISIGN_NAMESPACE(ec_curve_init) +#define ec_curve_init_from_A SQISIGN_NAMESPACE(ec_curve_init_from_A) +#define ec_curve_normalize_A24 SQISIGN_NAMESPACE(ec_curve_normalize_A24) +#define ec_curve_verify_A SQISIGN_NAMESPACE(ec_curve_verify_A) +#define ec_dbl SQISIGN_NAMESPACE(ec_dbl) +#define ec_dbl_iter SQISIGN_NAMESPACE(ec_dbl_iter) +#define ec_dbl_iter_basis SQISIGN_NAMESPACE(ec_dbl_iter_basis) +#define ec_has_zero_coordinate SQISIGN_NAMESPACE(ec_has_zero_coordinate) +#define ec_is_basis_four_torsion SQISIGN_NAMESPACE(ec_is_basis_four_torsion) +#define ec_is_equal SQISIGN_NAMESPACE(ec_is_equal) +#define ec_is_four_torsion SQISIGN_NAMESPACE(ec_is_four_torsion) +#define ec_is_two_torsion SQISIGN_NAMESPACE(ec_is_two_torsion) +#define ec_is_zero SQISIGN_NAMESPACE(ec_is_zero) +#define ec_j_inv SQISIGN_NAMESPACE(ec_j_inv) +#define ec_ladder3pt SQISIGN_NAMESPACE(ec_ladder3pt) +#define ec_mul SQISIGN_NAMESPACE(ec_mul) +#define ec_normalize_curve SQISIGN_NAMESPACE(ec_normalize_curve) +#define ec_normalize_curve_and_A24 SQISIGN_NAMESPACE(ec_normalize_curve_and_A24) +#define ec_normalize_point SQISIGN_NAMESPACE(ec_normalize_point) +#define ec_point_init SQISIGN_NAMESPACE(ec_point_init) +#define select_point SQISIGN_NAMESPACE(select_point) +#define xADD SQISIGN_NAMESPACE(xADD) +#define xDBL SQISIGN_NAMESPACE(xDBL) +#define xDBLADD SQISIGN_NAMESPACE(xDBLADD) +#define xDBLMUL SQISIGN_NAMESPACE(xDBLMUL) +#define xDBL_A24 SQISIGN_NAMESPACE(xDBL_A24) +#define xDBL_E0 SQISIGN_NAMESPACE(xDBL_E0) +#define xMUL SQISIGN_NAMESPACE(xMUL) + +// Namespacing symbols exported from ec_jac.c: +#undef ADD +#undef DBL +#undef DBLW +#undef copy_jac_point +#undef jac_from_ws +#undef jac_init +#undef jac_is_equal +#undef jac_neg +#undef jac_to_ws +#undef jac_to_xz +#undef jac_to_xz_add_components +#undef select_jac_point + +#define ADD SQISIGN_NAMESPACE(ADD) +#define DBL SQISIGN_NAMESPACE(DBL) +#define DBLW SQISIGN_NAMESPACE(DBLW) +#define copy_jac_point SQISIGN_NAMESPACE(copy_jac_point) +#define jac_from_ws SQISIGN_NAMESPACE(jac_from_ws) +#define jac_init SQISIGN_NAMESPACE(jac_init) +#define jac_is_equal SQISIGN_NAMESPACE(jac_is_equal) +#define jac_neg SQISIGN_NAMESPACE(jac_neg) +#define jac_to_ws SQISIGN_NAMESPACE(jac_to_ws) +#define jac_to_xz SQISIGN_NAMESPACE(jac_to_xz) +#define jac_to_xz_add_components SQISIGN_NAMESPACE(jac_to_xz_add_components) +#define select_jac_point SQISIGN_NAMESPACE(select_jac_point) + +// Namespacing symbols exported from encode_signature.c: +#undef secret_key_from_bytes +#undef secret_key_to_bytes + +#define secret_key_from_bytes SQISIGN_NAMESPACE(secret_key_from_bytes) +#define secret_key_to_bytes SQISIGN_NAMESPACE(secret_key_to_bytes) + +// Namespacing symbols exported from encode_verification.c: +#undef public_key_from_bytes +#undef public_key_to_bytes +#undef signature_from_bytes +#undef signature_to_bytes + +#define public_key_from_bytes SQISIGN_NAMESPACE(public_key_from_bytes) +#define public_key_to_bytes SQISIGN_NAMESPACE(public_key_to_bytes) +#define signature_from_bytes SQISIGN_NAMESPACE(signature_from_bytes) +#define signature_to_bytes SQISIGN_NAMESPACE(signature_to_bytes) + +// Namespacing symbols exported from finit.c: +#undef ibz_mat_2x2_finalize +#undef ibz_mat_2x2_init +#undef ibz_mat_4x4_finalize +#undef ibz_mat_4x4_init +#undef ibz_vec_2_finalize +#undef ibz_vec_2_init +#undef ibz_vec_4_finalize +#undef ibz_vec_4_init +#undef quat_alg_elem_finalize +#undef quat_alg_elem_init +#undef quat_alg_finalize +#undef quat_alg_init_set +#undef quat_lattice_finalize +#undef quat_lattice_init +#undef quat_left_ideal_finalize +#undef quat_left_ideal_init + +#define ibz_mat_2x2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_finalize) +#define ibz_mat_2x2_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_init) +#define ibz_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_finalize) +#define ibz_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_init) +#define ibz_vec_2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_finalize) +#define ibz_vec_2_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_init) +#define ibz_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_finalize) +#define ibz_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_init) +#define quat_alg_elem_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_finalize) +#define quat_alg_elem_init SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_init) +#define quat_alg_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_finalize) +#define quat_alg_init_set SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set) +#define quat_lattice_finalize SQISIGN_NAMESPACE_GENERIC(quat_lattice_finalize) +#define quat_lattice_init SQISIGN_NAMESPACE_GENERIC(quat_lattice_init) +#define quat_left_ideal_finalize SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_finalize) +#define quat_left_ideal_init SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_init) + +// Namespacing symbols exported from fp.c: +#undef fp_select + +#define fp_select SQISIGN_NAMESPACE(fp_select) + +// Namespacing symbols exported from fp.c, fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_exp3div4 +#undef fp_inv +#undef fp_is_square +#undef fp_sqrt + +#define fp_exp3div4 SQISIGN_NAMESPACE(fp_exp3div4) +#define fp_inv SQISIGN_NAMESPACE(fp_inv) +#define fp_is_square SQISIGN_NAMESPACE(fp_is_square) +#define fp_sqrt SQISIGN_NAMESPACE(fp_sqrt) + +// Namespacing symbols exported from fp2.c: +#undef fp2_add +#undef fp2_add_one +#undef fp2_batched_inv +#undef fp2_copy +#undef fp2_cswap +#undef fp2_decode +#undef fp2_encode +#undef fp2_half +#undef fp2_inv +#undef fp2_is_equal +#undef fp2_is_one +#undef fp2_is_square +#undef fp2_is_zero +#undef fp2_mul +#undef fp2_mul_small +#undef fp2_neg +#undef fp2_pow_vartime +#undef fp2_print +#undef fp2_select +#undef fp2_set_one +#undef fp2_set_small +#undef fp2_set_zero +#undef fp2_sqr +#undef fp2_sqrt +#undef fp2_sqrt_verify +#undef fp2_sub + +#define fp2_add SQISIGN_NAMESPACE(fp2_add) +#define fp2_add_one SQISIGN_NAMESPACE(fp2_add_one) +#define fp2_batched_inv SQISIGN_NAMESPACE(fp2_batched_inv) +#define fp2_copy SQISIGN_NAMESPACE(fp2_copy) +#define fp2_cswap SQISIGN_NAMESPACE(fp2_cswap) +#define fp2_decode SQISIGN_NAMESPACE(fp2_decode) +#define fp2_encode SQISIGN_NAMESPACE(fp2_encode) +#define fp2_half SQISIGN_NAMESPACE(fp2_half) +#define fp2_inv SQISIGN_NAMESPACE(fp2_inv) +#define fp2_is_equal SQISIGN_NAMESPACE(fp2_is_equal) +#define fp2_is_one SQISIGN_NAMESPACE(fp2_is_one) +#define fp2_is_square SQISIGN_NAMESPACE(fp2_is_square) +#define fp2_is_zero SQISIGN_NAMESPACE(fp2_is_zero) +#define fp2_mul SQISIGN_NAMESPACE(fp2_mul) +#define fp2_mul_small SQISIGN_NAMESPACE(fp2_mul_small) +#define fp2_neg SQISIGN_NAMESPACE(fp2_neg) +#define fp2_pow_vartime SQISIGN_NAMESPACE(fp2_pow_vartime) +#define fp2_print SQISIGN_NAMESPACE(fp2_print) +#define fp2_select SQISIGN_NAMESPACE(fp2_select) +#define fp2_set_one SQISIGN_NAMESPACE(fp2_set_one) +#define fp2_set_small SQISIGN_NAMESPACE(fp2_set_small) +#define fp2_set_zero SQISIGN_NAMESPACE(fp2_set_zero) +#define fp2_sqr SQISIGN_NAMESPACE(fp2_sqr) +#define fp2_sqrt SQISIGN_NAMESPACE(fp2_sqrt) +#define fp2_sqrt_verify SQISIGN_NAMESPACE(fp2_sqrt_verify) +#define fp2_sub SQISIGN_NAMESPACE(fp2_sub) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_copy +#undef fp_cswap +#undef fp_decode +#undef fp_decode_reduce +#undef fp_div3 +#undef fp_encode +#undef fp_half +#undef fp_is_equal +#undef fp_is_zero +#undef fp_mul_small +#undef fp_neg +#undef fp_set_one +#undef fp_set_small +#undef fp_set_zero + +#define fp_copy SQISIGN_NAMESPACE(fp_copy) +#define fp_cswap SQISIGN_NAMESPACE(fp_cswap) +#define fp_decode SQISIGN_NAMESPACE(fp_decode) +#define fp_decode_reduce SQISIGN_NAMESPACE(fp_decode_reduce) +#define fp_div3 SQISIGN_NAMESPACE(fp_div3) +#define fp_encode SQISIGN_NAMESPACE(fp_encode) +#define fp_half SQISIGN_NAMESPACE(fp_half) +#define fp_is_equal SQISIGN_NAMESPACE(fp_is_equal) +#define fp_is_zero SQISIGN_NAMESPACE(fp_is_zero) +#define fp_mul_small SQISIGN_NAMESPACE(fp_mul_small) +#define fp_neg SQISIGN_NAMESPACE(fp_neg) +#define fp_set_one SQISIGN_NAMESPACE(fp_set_one) +#define fp_set_small SQISIGN_NAMESPACE(fp_set_small) +#define fp_set_zero SQISIGN_NAMESPACE(fp_set_zero) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c, gf27500.c, gf5248.c, gf65376.c: +#undef fp_add +#undef fp_mul +#undef fp_sqr +#undef fp_sub + +#define fp_add SQISIGN_NAMESPACE(fp_add) +#define fp_mul SQISIGN_NAMESPACE(fp_mul) +#define fp_sqr SQISIGN_NAMESPACE(fp_sqr) +#define fp_sub SQISIGN_NAMESPACE(fp_sub) + +// Namespacing symbols exported from gf27500.c: +#undef gf27500_decode +#undef gf27500_decode_reduce +#undef gf27500_div +#undef gf27500_div3 +#undef gf27500_encode +#undef gf27500_invert +#undef gf27500_legendre +#undef gf27500_sqrt + +#define gf27500_decode SQISIGN_NAMESPACE(gf27500_decode) +#define gf27500_decode_reduce SQISIGN_NAMESPACE(gf27500_decode_reduce) +#define gf27500_div SQISIGN_NAMESPACE(gf27500_div) +#define gf27500_div3 SQISIGN_NAMESPACE(gf27500_div3) +#define gf27500_encode SQISIGN_NAMESPACE(gf27500_encode) +#define gf27500_invert SQISIGN_NAMESPACE(gf27500_invert) +#define gf27500_legendre SQISIGN_NAMESPACE(gf27500_legendre) +#define gf27500_sqrt SQISIGN_NAMESPACE(gf27500_sqrt) + +// Namespacing symbols exported from gf27500.c, gf5248.c, gf65376.c: +#undef fp2_mul_c0 +#undef fp2_mul_c1 +#undef fp2_sq_c0 +#undef fp2_sq_c1 + +#define fp2_mul_c0 SQISIGN_NAMESPACE(fp2_mul_c0) +#define fp2_mul_c1 SQISIGN_NAMESPACE(fp2_mul_c1) +#define fp2_sq_c0 SQISIGN_NAMESPACE(fp2_sq_c0) +#define fp2_sq_c1 SQISIGN_NAMESPACE(fp2_sq_c1) + +// Namespacing symbols exported from gf5248.c: +#undef gf5248_decode +#undef gf5248_decode_reduce +#undef gf5248_div +#undef gf5248_div3 +#undef gf5248_encode +#undef gf5248_invert +#undef gf5248_legendre +#undef gf5248_sqrt + +#define gf5248_decode SQISIGN_NAMESPACE(gf5248_decode) +#define gf5248_decode_reduce SQISIGN_NAMESPACE(gf5248_decode_reduce) +#define gf5248_div SQISIGN_NAMESPACE(gf5248_div) +#define gf5248_div3 SQISIGN_NAMESPACE(gf5248_div3) +#define gf5248_encode SQISIGN_NAMESPACE(gf5248_encode) +#define gf5248_invert SQISIGN_NAMESPACE(gf5248_invert) +#define gf5248_legendre SQISIGN_NAMESPACE(gf5248_legendre) +#define gf5248_sqrt SQISIGN_NAMESPACE(gf5248_sqrt) + +// Namespacing symbols exported from gf65376.c: +#undef gf65376_decode +#undef gf65376_decode_reduce +#undef gf65376_div +#undef gf65376_div3 +#undef gf65376_encode +#undef gf65376_invert +#undef gf65376_legendre +#undef gf65376_sqrt + +#define gf65376_decode SQISIGN_NAMESPACE(gf65376_decode) +#define gf65376_decode_reduce SQISIGN_NAMESPACE(gf65376_decode_reduce) +#define gf65376_div SQISIGN_NAMESPACE(gf65376_div) +#define gf65376_div3 SQISIGN_NAMESPACE(gf65376_div3) +#define gf65376_encode SQISIGN_NAMESPACE(gf65376_encode) +#define gf65376_invert SQISIGN_NAMESPACE(gf65376_invert) +#define gf65376_legendre SQISIGN_NAMESPACE(gf65376_legendre) +#define gf65376_sqrt SQISIGN_NAMESPACE(gf65376_sqrt) + +// Namespacing symbols exported from hd.c: +#undef add_couple_jac_points +#undef copy_bases_to_kernel +#undef couple_jac_to_xz +#undef double_couple_jac_point +#undef double_couple_jac_point_iter +#undef double_couple_point +#undef double_couple_point_iter + +#define add_couple_jac_points SQISIGN_NAMESPACE(add_couple_jac_points) +#define copy_bases_to_kernel SQISIGN_NAMESPACE(copy_bases_to_kernel) +#define couple_jac_to_xz SQISIGN_NAMESPACE(couple_jac_to_xz) +#define double_couple_jac_point SQISIGN_NAMESPACE(double_couple_jac_point) +#define double_couple_jac_point_iter SQISIGN_NAMESPACE(double_couple_jac_point_iter) +#define double_couple_point SQISIGN_NAMESPACE(double_couple_point) +#define double_couple_point_iter SQISIGN_NAMESPACE(double_couple_point_iter) + +// Namespacing symbols exported from hnf.c: +#undef ibz_mat_4x4_is_hnf +#undef ibz_mat_4xn_hnf_mod_core +#undef ibz_vec_4_copy_mod +#undef ibz_vec_4_linear_combination_mod +#undef ibz_vec_4_scalar_mul_mod + +#define ibz_mat_4x4_is_hnf SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_hnf) +#define ibz_mat_4xn_hnf_mod_core SQISIGN_NAMESPACE_GENERIC(ibz_mat_4xn_hnf_mod_core) +#define ibz_vec_4_copy_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_mod) +#define ibz_vec_4_linear_combination_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination_mod) +#define ibz_vec_4_scalar_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul_mod) + +// Namespacing symbols exported from hnf_internal.c: +#undef ibz_centered_mod +#undef ibz_conditional_assign +#undef ibz_mod_not_zero +#undef ibz_xgcd_with_u_not_0 + +#define ibz_centered_mod SQISIGN_NAMESPACE_GENERIC(ibz_centered_mod) +#define ibz_conditional_assign SQISIGN_NAMESPACE_GENERIC(ibz_conditional_assign) +#define ibz_mod_not_zero SQISIGN_NAMESPACE_GENERIC(ibz_mod_not_zero) +#define ibz_xgcd_with_u_not_0 SQISIGN_NAMESPACE_GENERIC(ibz_xgcd_with_u_not_0) + +// Namespacing symbols exported from ibz_division.c: +#undef ibz_xgcd + +#define ibz_xgcd SQISIGN_NAMESPACE_GENERIC(ibz_xgcd) + +// Namespacing symbols exported from id2iso.c: +#undef change_of_basis_matrix_tate +#undef change_of_basis_matrix_tate_invert +#undef ec_biscalar_mul_ibz_vec +#undef endomorphism_application_even_basis +#undef id2iso_ideal_to_kernel_dlogs_even +#undef id2iso_kernel_dlogs_to_ideal_even +#undef matrix_application_even_basis + +#define change_of_basis_matrix_tate SQISIGN_NAMESPACE(change_of_basis_matrix_tate) +#define change_of_basis_matrix_tate_invert SQISIGN_NAMESPACE(change_of_basis_matrix_tate_invert) +#define ec_biscalar_mul_ibz_vec SQISIGN_NAMESPACE(ec_biscalar_mul_ibz_vec) +#define endomorphism_application_even_basis SQISIGN_NAMESPACE(endomorphism_application_even_basis) +#define id2iso_ideal_to_kernel_dlogs_even SQISIGN_NAMESPACE(id2iso_ideal_to_kernel_dlogs_even) +#define id2iso_kernel_dlogs_to_ideal_even SQISIGN_NAMESPACE(id2iso_kernel_dlogs_to_ideal_even) +#define matrix_application_even_basis SQISIGN_NAMESPACE(matrix_application_even_basis) + +// Namespacing symbols exported from ideal.c: +#undef quat_lideal_add +#undef quat_lideal_class_gram +#undef quat_lideal_conjugate_without_hnf +#undef quat_lideal_copy +#undef quat_lideal_create +#undef quat_lideal_create_principal +#undef quat_lideal_equals +#undef quat_lideal_generator +#undef quat_lideal_inter +#undef quat_lideal_inverse_lattice_without_hnf +#undef quat_lideal_mul +#undef quat_lideal_norm +#undef quat_lideal_right_order +#undef quat_lideal_right_transporter +#undef quat_order_discriminant +#undef quat_order_is_maximal + +#define quat_lideal_add SQISIGN_NAMESPACE_GENERIC(quat_lideal_add) +#define quat_lideal_class_gram SQISIGN_NAMESPACE_GENERIC(quat_lideal_class_gram) +#define quat_lideal_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_conjugate_without_hnf) +#define quat_lideal_copy SQISIGN_NAMESPACE_GENERIC(quat_lideal_copy) +#define quat_lideal_create SQISIGN_NAMESPACE_GENERIC(quat_lideal_create) +#define quat_lideal_create_principal SQISIGN_NAMESPACE_GENERIC(quat_lideal_create_principal) +#define quat_lideal_equals SQISIGN_NAMESPACE_GENERIC(quat_lideal_equals) +#define quat_lideal_generator SQISIGN_NAMESPACE_GENERIC(quat_lideal_generator) +#define quat_lideal_inter SQISIGN_NAMESPACE_GENERIC(quat_lideal_inter) +#define quat_lideal_inverse_lattice_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_inverse_lattice_without_hnf) +#define quat_lideal_mul SQISIGN_NAMESPACE_GENERIC(quat_lideal_mul) +#define quat_lideal_norm SQISIGN_NAMESPACE_GENERIC(quat_lideal_norm) +#define quat_lideal_right_order SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_order) +#define quat_lideal_right_transporter SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_transporter) +#define quat_order_discriminant SQISIGN_NAMESPACE_GENERIC(quat_order_discriminant) +#define quat_order_is_maximal SQISIGN_NAMESPACE_GENERIC(quat_order_is_maximal) + +// Namespacing symbols exported from intbig.c: +#undef ibz_abs +#undef ibz_add +#undef ibz_bitsize +#undef ibz_cmp +#undef ibz_cmp_int32 +#undef ibz_convert_to_str +#undef ibz_copy +#undef ibz_copy_digits +#undef ibz_div +#undef ibz_div_2exp +#undef ibz_div_floor +#undef ibz_divides +#undef ibz_finalize +#undef ibz_gcd +#undef ibz_get +#undef ibz_init +#undef ibz_invmod +#undef ibz_is_even +#undef ibz_is_odd +#undef ibz_is_one +#undef ibz_is_zero +#undef ibz_legendre +#undef ibz_mod +#undef ibz_mod_ui +#undef ibz_mul +#undef ibz_neg +#undef ibz_pow +#undef ibz_pow_mod +#undef ibz_print +#undef ibz_probab_prime +#undef ibz_rand_interval +#undef ibz_rand_interval_bits +#undef ibz_rand_interval_i +#undef ibz_rand_interval_minm_m +#undef ibz_set +#undef ibz_set_from_str +#undef ibz_size_in_base +#undef ibz_sqrt +#undef ibz_sqrt_floor +#undef ibz_sqrt_mod_p +#undef ibz_sub +#undef ibz_swap +#undef ibz_to_digits +#undef ibz_two_adic + +#define ibz_abs SQISIGN_NAMESPACE_GENERIC(ibz_abs) +#define ibz_add SQISIGN_NAMESPACE_GENERIC(ibz_add) +#define ibz_bitsize SQISIGN_NAMESPACE_GENERIC(ibz_bitsize) +#define ibz_cmp SQISIGN_NAMESPACE_GENERIC(ibz_cmp) +#define ibz_cmp_int32 SQISIGN_NAMESPACE_GENERIC(ibz_cmp_int32) +#define ibz_convert_to_str SQISIGN_NAMESPACE_GENERIC(ibz_convert_to_str) +#define ibz_copy SQISIGN_NAMESPACE_GENERIC(ibz_copy) +#define ibz_copy_digits SQISIGN_NAMESPACE_GENERIC(ibz_copy_digits) +#define ibz_div SQISIGN_NAMESPACE_GENERIC(ibz_div) +#define ibz_div_2exp SQISIGN_NAMESPACE_GENERIC(ibz_div_2exp) +#define ibz_div_floor SQISIGN_NAMESPACE_GENERIC(ibz_div_floor) +#define ibz_divides SQISIGN_NAMESPACE_GENERIC(ibz_divides) +#define ibz_finalize SQISIGN_NAMESPACE_GENERIC(ibz_finalize) +#define ibz_gcd SQISIGN_NAMESPACE_GENERIC(ibz_gcd) +#define ibz_get SQISIGN_NAMESPACE_GENERIC(ibz_get) +#define ibz_init SQISIGN_NAMESPACE_GENERIC(ibz_init) +#define ibz_invmod SQISIGN_NAMESPACE_GENERIC(ibz_invmod) +#define ibz_is_even SQISIGN_NAMESPACE_GENERIC(ibz_is_even) +#define ibz_is_odd SQISIGN_NAMESPACE_GENERIC(ibz_is_odd) +#define ibz_is_one SQISIGN_NAMESPACE_GENERIC(ibz_is_one) +#define ibz_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_is_zero) +#define ibz_legendre SQISIGN_NAMESPACE_GENERIC(ibz_legendre) +#define ibz_mod SQISIGN_NAMESPACE_GENERIC(ibz_mod) +#define ibz_mod_ui SQISIGN_NAMESPACE_GENERIC(ibz_mod_ui) +#define ibz_mul SQISIGN_NAMESPACE_GENERIC(ibz_mul) +#define ibz_neg SQISIGN_NAMESPACE_GENERIC(ibz_neg) +#define ibz_pow SQISIGN_NAMESPACE_GENERIC(ibz_pow) +#define ibz_pow_mod SQISIGN_NAMESPACE_GENERIC(ibz_pow_mod) +#define ibz_print SQISIGN_NAMESPACE_GENERIC(ibz_print) +#define ibz_probab_prime SQISIGN_NAMESPACE_GENERIC(ibz_probab_prime) +#define ibz_rand_interval SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval) +#define ibz_rand_interval_bits SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_bits) +#define ibz_rand_interval_i SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_i) +#define ibz_rand_interval_minm_m SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_minm_m) +#define ibz_set SQISIGN_NAMESPACE_GENERIC(ibz_set) +#define ibz_set_from_str SQISIGN_NAMESPACE_GENERIC(ibz_set_from_str) +#define ibz_size_in_base SQISIGN_NAMESPACE_GENERIC(ibz_size_in_base) +#define ibz_sqrt SQISIGN_NAMESPACE_GENERIC(ibz_sqrt) +#define ibz_sqrt_floor SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_floor) +#define ibz_sqrt_mod_p SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_mod_p) +#define ibz_sub SQISIGN_NAMESPACE_GENERIC(ibz_sub) +#define ibz_swap SQISIGN_NAMESPACE_GENERIC(ibz_swap) +#define ibz_to_digits SQISIGN_NAMESPACE_GENERIC(ibz_to_digits) +#define ibz_two_adic SQISIGN_NAMESPACE_GENERIC(ibz_two_adic) + +// Namespacing symbols exported from integers.c: +#undef ibz_cornacchia_prime +#undef ibz_generate_random_prime + +#define ibz_cornacchia_prime SQISIGN_NAMESPACE_GENERIC(ibz_cornacchia_prime) +#define ibz_generate_random_prime SQISIGN_NAMESPACE_GENERIC(ibz_generate_random_prime) + +// Namespacing symbols exported from isog_chains.c: +#undef ec_eval_even +#undef ec_eval_small_chain +#undef ec_iso_eval +#undef ec_isomorphism + +#define ec_eval_even SQISIGN_NAMESPACE(ec_eval_even) +#define ec_eval_small_chain SQISIGN_NAMESPACE(ec_eval_small_chain) +#define ec_iso_eval SQISIGN_NAMESPACE(ec_iso_eval) +#define ec_isomorphism SQISIGN_NAMESPACE(ec_isomorphism) + +// Namespacing symbols exported from keygen.c: +#undef protocols_keygen +#undef secret_key_finalize +#undef secret_key_init + +#define protocols_keygen SQISIGN_NAMESPACE(protocols_keygen) +#define secret_key_finalize SQISIGN_NAMESPACE(secret_key_finalize) +#define secret_key_init SQISIGN_NAMESPACE(secret_key_init) + +// Namespacing symbols exported from l2.c: +#undef quat_lattice_lll +#undef quat_lll_core + +#define quat_lattice_lll SQISIGN_NAMESPACE_GENERIC(quat_lattice_lll) +#define quat_lll_core SQISIGN_NAMESPACE_GENERIC(quat_lll_core) + +// Namespacing symbols exported from lat_ball.c: +#undef quat_lattice_bound_parallelogram +#undef quat_lattice_sample_from_ball + +#define quat_lattice_bound_parallelogram SQISIGN_NAMESPACE_GENERIC(quat_lattice_bound_parallelogram) +#define quat_lattice_sample_from_ball SQISIGN_NAMESPACE_GENERIC(quat_lattice_sample_from_ball) + +// Namespacing symbols exported from lattice.c: +#undef quat_lattice_add +#undef quat_lattice_alg_elem_mul +#undef quat_lattice_conjugate_without_hnf +#undef quat_lattice_contains +#undef quat_lattice_dual_without_hnf +#undef quat_lattice_equal +#undef quat_lattice_gram +#undef quat_lattice_hnf +#undef quat_lattice_inclusion +#undef quat_lattice_index +#undef quat_lattice_intersect +#undef quat_lattice_mat_alg_coord_mul_without_hnf +#undef quat_lattice_mul +#undef quat_lattice_reduce_denom + +#define quat_lattice_add SQISIGN_NAMESPACE_GENERIC(quat_lattice_add) +#define quat_lattice_alg_elem_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_alg_elem_mul) +#define quat_lattice_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_conjugate_without_hnf) +#define quat_lattice_contains SQISIGN_NAMESPACE_GENERIC(quat_lattice_contains) +#define quat_lattice_dual_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_dual_without_hnf) +#define quat_lattice_equal SQISIGN_NAMESPACE_GENERIC(quat_lattice_equal) +#define quat_lattice_gram SQISIGN_NAMESPACE_GENERIC(quat_lattice_gram) +#define quat_lattice_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_hnf) +#define quat_lattice_inclusion SQISIGN_NAMESPACE_GENERIC(quat_lattice_inclusion) +#define quat_lattice_index SQISIGN_NAMESPACE_GENERIC(quat_lattice_index) +#define quat_lattice_intersect SQISIGN_NAMESPACE_GENERIC(quat_lattice_intersect) +#define quat_lattice_mat_alg_coord_mul_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_mat_alg_coord_mul_without_hnf) +#define quat_lattice_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_mul) +#define quat_lattice_reduce_denom SQISIGN_NAMESPACE_GENERIC(quat_lattice_reduce_denom) + +// Namespacing symbols exported from lll_applications.c: +#undef quat_lideal_lideal_mul_reduced +#undef quat_lideal_prime_norm_reduced_equivalent +#undef quat_lideal_reduce_basis + +#define quat_lideal_lideal_mul_reduced SQISIGN_NAMESPACE_GENERIC(quat_lideal_lideal_mul_reduced) +#define quat_lideal_prime_norm_reduced_equivalent SQISIGN_NAMESPACE_GENERIC(quat_lideal_prime_norm_reduced_equivalent) +#define quat_lideal_reduce_basis SQISIGN_NAMESPACE_GENERIC(quat_lideal_reduce_basis) + +// Namespacing symbols exported from lll_verification.c: +#undef ibq_vec_4_copy_ibz +#undef quat_lll_bilinear +#undef quat_lll_gram_schmidt_transposed_with_ibq +#undef quat_lll_set_ibq_parameters +#undef quat_lll_verify + +#define ibq_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_copy_ibz) +#define quat_lll_bilinear SQISIGN_NAMESPACE_GENERIC(quat_lll_bilinear) +#define quat_lll_gram_schmidt_transposed_with_ibq SQISIGN_NAMESPACE_GENERIC(quat_lll_gram_schmidt_transposed_with_ibq) +#define quat_lll_set_ibq_parameters SQISIGN_NAMESPACE_GENERIC(quat_lll_set_ibq_parameters) +#define quat_lll_verify SQISIGN_NAMESPACE_GENERIC(quat_lll_verify) + +// Namespacing symbols exported from mem.c: +#undef sqisign_secure_clear +#undef sqisign_secure_free + +#define sqisign_secure_clear SQISIGN_NAMESPACE_GENERIC(sqisign_secure_clear) +#define sqisign_secure_free SQISIGN_NAMESPACE_GENERIC(sqisign_secure_free) + +// Namespacing symbols exported from mp.c: +#undef MUL +#undef mp_add +#undef mp_compare +#undef mp_copy +#undef mp_inv_2e +#undef mp_invert_matrix +#undef mp_is_one +#undef mp_is_zero +#undef mp_mod_2exp +#undef mp_mul +#undef mp_mul2 +#undef mp_neg +#undef mp_print +#undef mp_shiftl +#undef mp_shiftr +#undef mp_sub +#undef multiple_mp_shiftl +#undef select_ct +#undef swap_ct + +#define MUL SQISIGN_NAMESPACE_GENERIC(MUL) +#define mp_add SQISIGN_NAMESPACE_GENERIC(mp_add) +#define mp_compare SQISIGN_NAMESPACE_GENERIC(mp_compare) +#define mp_copy SQISIGN_NAMESPACE_GENERIC(mp_copy) +#define mp_inv_2e SQISIGN_NAMESPACE_GENERIC(mp_inv_2e) +#define mp_invert_matrix SQISIGN_NAMESPACE_GENERIC(mp_invert_matrix) +#define mp_is_one SQISIGN_NAMESPACE_GENERIC(mp_is_one) +#define mp_is_zero SQISIGN_NAMESPACE_GENERIC(mp_is_zero) +#define mp_mod_2exp SQISIGN_NAMESPACE_GENERIC(mp_mod_2exp) +#define mp_mul SQISIGN_NAMESPACE_GENERIC(mp_mul) +#define mp_mul2 SQISIGN_NAMESPACE_GENERIC(mp_mul2) +#define mp_neg SQISIGN_NAMESPACE_GENERIC(mp_neg) +#define mp_print SQISIGN_NAMESPACE_GENERIC(mp_print) +#define mp_shiftl SQISIGN_NAMESPACE_GENERIC(mp_shiftl) +#define mp_shiftr SQISIGN_NAMESPACE_GENERIC(mp_shiftr) +#define mp_sub SQISIGN_NAMESPACE_GENERIC(mp_sub) +#define multiple_mp_shiftl SQISIGN_NAMESPACE_GENERIC(multiple_mp_shiftl) +#define select_ct SQISIGN_NAMESPACE_GENERIC(select_ct) +#define swap_ct SQISIGN_NAMESPACE_GENERIC(swap_ct) + +// Namespacing symbols exported from normeq.c: +#undef quat_change_to_O0_basis +#undef quat_lattice_O0_set +#undef quat_lattice_O0_set_extremal +#undef quat_order_elem_create +#undef quat_represent_integer +#undef quat_sampling_random_ideal_O0_given_norm + +#define quat_change_to_O0_basis SQISIGN_NAMESPACE_GENERIC(quat_change_to_O0_basis) +#define quat_lattice_O0_set SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set) +#define quat_lattice_O0_set_extremal SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set_extremal) +#define quat_order_elem_create SQISIGN_NAMESPACE_GENERIC(quat_order_elem_create) +#define quat_represent_integer SQISIGN_NAMESPACE_GENERIC(quat_represent_integer) +#define quat_sampling_random_ideal_O0_given_norm SQISIGN_NAMESPACE_GENERIC(quat_sampling_random_ideal_O0_given_norm) + +// Namespacing symbols exported from printer.c: +#undef ibz_mat_2x2_print +#undef ibz_mat_4x4_print +#undef ibz_vec_2_print +#undef ibz_vec_4_print +#undef quat_alg_elem_print +#undef quat_alg_print +#undef quat_lattice_print +#undef quat_left_ideal_print + +#define ibz_mat_2x2_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_print) +#define ibz_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_print) +#define ibz_vec_2_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_print) +#define ibz_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_print) +#define quat_alg_elem_print SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_print) +#define quat_alg_print SQISIGN_NAMESPACE_GENERIC(quat_alg_print) +#define quat_lattice_print SQISIGN_NAMESPACE_GENERIC(quat_lattice_print) +#define quat_left_ideal_print SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_print) + +// Namespacing symbols exported from random_input_generation.c: +#undef quat_test_input_random_ideal_generation +#undef quat_test_input_random_ideal_lattice_generation +#undef quat_test_input_random_lattice_generation + +#define quat_test_input_random_ideal_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_generation) +#define quat_test_input_random_ideal_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_lattice_generation) +#define quat_test_input_random_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_lattice_generation) + +// Namespacing symbols exported from rationals.c: +#undef ibq_abs +#undef ibq_add +#undef ibq_cmp +#undef ibq_copy +#undef ibq_finalize +#undef ibq_init +#undef ibq_inv +#undef ibq_is_ibz +#undef ibq_is_one +#undef ibq_is_zero +#undef ibq_mat_4x4_finalize +#undef ibq_mat_4x4_init +#undef ibq_mat_4x4_print +#undef ibq_mul +#undef ibq_neg +#undef ibq_reduce +#undef ibq_set +#undef ibq_sub +#undef ibq_to_ibz +#undef ibq_vec_4_finalize +#undef ibq_vec_4_init +#undef ibq_vec_4_print + +#define ibq_abs SQISIGN_NAMESPACE_GENERIC(ibq_abs) +#define ibq_add SQISIGN_NAMESPACE_GENERIC(ibq_add) +#define ibq_cmp SQISIGN_NAMESPACE_GENERIC(ibq_cmp) +#define ibq_copy SQISIGN_NAMESPACE_GENERIC(ibq_copy) +#define ibq_finalize SQISIGN_NAMESPACE_GENERIC(ibq_finalize) +#define ibq_init SQISIGN_NAMESPACE_GENERIC(ibq_init) +#define ibq_inv SQISIGN_NAMESPACE_GENERIC(ibq_inv) +#define ibq_is_ibz SQISIGN_NAMESPACE_GENERIC(ibq_is_ibz) +#define ibq_is_one SQISIGN_NAMESPACE_GENERIC(ibq_is_one) +#define ibq_is_zero SQISIGN_NAMESPACE_GENERIC(ibq_is_zero) +#define ibq_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_finalize) +#define ibq_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_init) +#define ibq_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_print) +#define ibq_mul SQISIGN_NAMESPACE_GENERIC(ibq_mul) +#define ibq_neg SQISIGN_NAMESPACE_GENERIC(ibq_neg) +#define ibq_reduce SQISIGN_NAMESPACE_GENERIC(ibq_reduce) +#define ibq_set SQISIGN_NAMESPACE_GENERIC(ibq_set) +#define ibq_sub SQISIGN_NAMESPACE_GENERIC(ibq_sub) +#define ibq_to_ibz SQISIGN_NAMESPACE_GENERIC(ibq_to_ibz) +#define ibq_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_finalize) +#define ibq_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_init) +#define ibq_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_print) + +// Namespacing symbols exported from sign.c: +#undef protocols_sign + +#define protocols_sign SQISIGN_NAMESPACE(protocols_sign) + +// Namespacing symbols exported from sqisign.c: +#undef sqisign_keypair +#undef sqisign_open +#undef sqisign_sign +#undef sqisign_verify + +#define sqisign_keypair SQISIGN_NAMESPACE(sqisign_keypair) +#define sqisign_open SQISIGN_NAMESPACE(sqisign_open) +#define sqisign_sign SQISIGN_NAMESPACE(sqisign_sign) +#define sqisign_verify SQISIGN_NAMESPACE(sqisign_verify) + +// Namespacing symbols exported from theta_isogenies.c: +#undef theta_chain_compute_and_eval +#undef theta_chain_compute_and_eval_randomized +#undef theta_chain_compute_and_eval_verify + +#define theta_chain_compute_and_eval SQISIGN_NAMESPACE(theta_chain_compute_and_eval) +#define theta_chain_compute_and_eval_randomized SQISIGN_NAMESPACE(theta_chain_compute_and_eval_randomized) +#define theta_chain_compute_and_eval_verify SQISIGN_NAMESPACE(theta_chain_compute_and_eval_verify) + +// Namespacing symbols exported from theta_structure.c: +#undef double_iter +#undef double_point +#undef is_product_theta_point +#undef theta_precomputation + +#define double_iter SQISIGN_NAMESPACE(double_iter) +#define double_point SQISIGN_NAMESPACE(double_point) +#define is_product_theta_point SQISIGN_NAMESPACE(is_product_theta_point) +#define theta_precomputation SQISIGN_NAMESPACE(theta_precomputation) + +// Namespacing symbols exported from verify.c: +#undef protocols_verify + +#define protocols_verify SQISIGN_NAMESPACE(protocols_verify) + +// Namespacing symbols exported from xeval.c: +#undef xeval_2 +#undef xeval_2_singular +#undef xeval_4 + +#define xeval_2 SQISIGN_NAMESPACE(xeval_2) +#define xeval_2_singular SQISIGN_NAMESPACE(xeval_2_singular) +#define xeval_4 SQISIGN_NAMESPACE(xeval_4) + +// Namespacing symbols exported from xisog.c: +#undef xisog_2 +#undef xisog_2_singular +#undef xisog_4 + +#define xisog_2 SQISIGN_NAMESPACE(xisog_2) +#define xisog_2_singular SQISIGN_NAMESPACE(xisog_2_singular) +#define xisog_4 SQISIGN_NAMESPACE(xisog_4) + + +#endif + diff --git a/src/pqm4/sqisign_lvl3/ref/theta_isogenies.c b/src/pqm4/sqisign_lvl3/ref/theta_isogenies.c new file mode 100644 index 0000000..478a9ab --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/theta_isogenies.c @@ -0,0 +1,1283 @@ +#include "theta_isogenies.h" +#include +#include +#include +#include +#include + +// Select a base change matrix in constant time, with M1 a regular +// base change matrix and M2 a precomputed base change matrix +// If option = 0 then M <- M1, else if option = 0xFF...FF then M <- M2 +static inline void +select_base_change_matrix(basis_change_matrix_t *M, + const basis_change_matrix_t *M1, + const precomp_basis_change_matrix_t *M2, + const uint32_t option) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + fp2_select(&M->m[i][j], &M1->m[i][j], &FP2_CONSTANTS[M2->m[i][j]], option); +} + +// Set a regular base change matrix from a precomputed one +static inline void +set_base_change_matrix_from_precomp(basis_change_matrix_t *res, const precomp_basis_change_matrix_t *M) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + res->m[i][j] = FP2_CONSTANTS[M->m[i][j]]; +} + +static inline void +choose_index_theta_point(fp2_t *res, int ind, const theta_point_t *T) +{ + const fp2_t *src = NULL; + switch (ind % 4) { + case 0: + src = &T->x; + break; + case 1: + src = &T->y; + break; + case 2: + src = &T->z; + break; + case 3: + src = &T->t; + break; + default: + assert(0); + } + fp2_copy(res, src); +} + +// same as apply_isomorphism method but more efficient when the t component of P is zero. +static void +apply_isomorphism_general(theta_point_t *res, + const basis_change_matrix_t *M, + const theta_point_t *P, + const bool Pt_not_zero) +{ + fp2_t x1; + theta_point_t temp; + + fp2_mul(&temp.x, &P->x, &M->m[0][0]); + fp2_mul(&x1, &P->y, &M->m[0][1]); + fp2_add(&temp.x, &temp.x, &x1); + fp2_mul(&x1, &P->z, &M->m[0][2]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&temp.y, &P->x, &M->m[1][0]); + fp2_mul(&x1, &P->y, &M->m[1][1]); + fp2_add(&temp.y, &temp.y, &x1); + fp2_mul(&x1, &P->z, &M->m[1][2]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&temp.z, &P->x, &M->m[2][0]); + fp2_mul(&x1, &P->y, &M->m[2][1]); + fp2_add(&temp.z, &temp.z, &x1); + fp2_mul(&x1, &P->z, &M->m[2][2]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&temp.t, &P->x, &M->m[3][0]); + fp2_mul(&x1, &P->y, &M->m[3][1]); + fp2_add(&temp.t, &temp.t, &x1); + fp2_mul(&x1, &P->z, &M->m[3][2]); + fp2_add(&temp.t, &temp.t, &x1); + + if (Pt_not_zero) { + fp2_mul(&x1, &P->t, &M->m[0][3]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&x1, &P->t, &M->m[1][3]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&x1, &P->t, &M->m[2][3]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&x1, &P->t, &M->m[3][3]); + fp2_add(&temp.t, &temp.t, &x1); + } + + fp2_copy(&res->x, &temp.x); + fp2_copy(&res->y, &temp.y); + fp2_copy(&res->z, &temp.z); + fp2_copy(&res->t, &temp.t); +} + +static void +apply_isomorphism(theta_point_t *res, const basis_change_matrix_t *M, const theta_point_t *P) +{ + apply_isomorphism_general(res, M, P, true); +} + +// set res = M1 * M2 with matrix multiplication +static void +base_change_matrix_multiplication(basis_change_matrix_t *res, + const basis_change_matrix_t *M1, + const basis_change_matrix_t *M2) +{ + basis_change_matrix_t tmp; + fp2_t sum, m_ik, m_kj; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + fp2_set_zero(&sum); + for (int k = 0; k < 4; k++) { + m_ik = M1->m[i][k]; + m_kj = M2->m[k][j]; + fp2_mul(&m_ik, &m_ik, &m_kj); + fp2_add(&sum, &sum, &m_ik); + } + tmp.m[i][j] = sum; + } + } + *res = tmp; +} + +// compute the theta_point corresponding to the couple of point T on an elliptic product +static void +base_change(theta_point_t *out, const theta_gluing_t *phi, const theta_couple_point_t *T) +{ + theta_point_t null_point; + + // null_point = (a : b : c : d) + // a = P1.x P2.x, b = P1.x P2.z, c = P1.z P2.x, d = P1.z P2.z + fp2_mul(&null_point.x, &T->P1.x, &T->P2.x); + fp2_mul(&null_point.y, &T->P1.x, &T->P2.z); + fp2_mul(&null_point.z, &T->P2.x, &T->P1.z); + fp2_mul(&null_point.t, &T->P1.z, &T->P2.z); + + // Apply the basis change + apply_isomorphism(out, &phi->M, &null_point); +} + +static void +action_by_translation_z_and_det(fp2_t *z_inv, fp2_t *det_inv, const ec_point_t *P4, const ec_point_t *P2) +{ + // Store the Z-coordinate to invert + fp2_copy(z_inv, &P4->z); + + // Then collect detij = xij wij - uij zij + fp2_t tmp; + fp2_mul(det_inv, &P4->x, &P2->z); + fp2_mul(&tmp, &P4->z, &P2->x); + fp2_sub(det_inv, det_inv, &tmp); +} + +static void +action_by_translation_compute_matrix(translation_matrix_t *G, + const ec_point_t *P4, + const ec_point_t *P2, + const fp2_t *z_inv, + const fp2_t *det_inv) +{ + fp2_t tmp; + + // Gi.g10 = uij xij /detij - xij/zij + fp2_mul(&tmp, &P4->x, z_inv); + fp2_mul(&G->g10, &P4->x, &P2->x); + fp2_mul(&G->g10, &G->g10, det_inv); + fp2_sub(&G->g10, &G->g10, &tmp); + + // Gi.g11 = uij zij * detij + fp2_mul(&G->g11, &P2->x, det_inv); + fp2_mul(&G->g11, &G->g11, &P4->z); + + // Gi.g00 = -Gi.g11 + fp2_neg(&G->g00, &G->g11); + + // Gi.g01 = - wij zij detij + fp2_mul(&G->g01, &P2->z, det_inv); + fp2_mul(&G->g01, &G->g01, &P4->z); + fp2_neg(&G->g01, &G->g01); +} + +// Returns 1 if the basis is as expected and 0 otherwise +// We only expect this to fail for malformed signatures, so +// do not require this to run in constant time. +static int +verify_two_torsion(const theta_couple_point_t *K1_2, const theta_couple_point_t *K2_2, const theta_couple_curve_t *E12) +{ + // First check if any point in K1_2 or K2_2 is zero, if they are then the points did not have + // order 8 when we started gluing + if (ec_is_zero(&K1_2->P1) | ec_is_zero(&K1_2->P2) | ec_is_zero(&K2_2->P1) | ec_is_zero(&K2_2->P2)) { + return 0; + } + + // Now ensure that P1, Q1 and P2, Q2 are independent. For points of order two this means + // that they're not the same + if (ec_is_equal(&K1_2->P1, &K2_2->P1) | ec_is_equal(&K1_2->P2, &K2_2->P2)) { + return 0; + } + + // Finally, double points to ensure all points have order exactly 0 + theta_couple_point_t O1, O2; + double_couple_point(&O1, K1_2, E12); + double_couple_point(&O2, K2_2, E12); + // If this check fails then the points had order 2*f for some f, and the kernel is malformed. + if (!(ec_is_zero(&O1.P1) & ec_is_zero(&O1.P2) & ec_is_zero(&O2.P1) & ec_is_zero(&O2.P2))) { + return 0; + } + + return 1; +} + +// Computes the action by translation for four points +// (P1, P2) and (Q1, Q2) on E1 x E2 simultaneously to +// save on inversions. +// Returns 0 if any of Pi or Qi does not have order 2 +// and 1 otherwise +static int +action_by_translation(translation_matrix_t *Gi, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute points of order 2 from Ki_4 + theta_couple_point_t K1_2, K2_2; + double_couple_point(&K1_2, K1_4, E12); + double_couple_point(&K2_2, K2_4, E12); + + if (!verify_two_torsion(&K1_2, &K2_2, E12)) { + return 0; + } + + // We need to invert four Z coordinates and + // four determinants which we do with batched + // inversion + fp2_t inverses[8]; + action_by_translation_z_and_det(&inverses[0], &inverses[4], &K1_4->P1, &K1_2.P1); + action_by_translation_z_and_det(&inverses[1], &inverses[5], &K1_4->P2, &K1_2.P2); + action_by_translation_z_and_det(&inverses[2], &inverses[6], &K2_4->P1, &K2_2.P1); + action_by_translation_z_and_det(&inverses[3], &inverses[7], &K2_4->P2, &K2_2.P2); + + fp2_batched_inv(inverses, 8); + if (fp2_is_zero(&inverses[0])) + return 0; // something was wrong with our input (which somehow was not caught by + // verify_two_torsion) + + action_by_translation_compute_matrix(&Gi[0], &K1_4->P1, &K1_2.P1, &inverses[0], &inverses[4]); + action_by_translation_compute_matrix(&Gi[1], &K1_4->P2, &K1_2.P2, &inverses[1], &inverses[5]); + action_by_translation_compute_matrix(&Gi[2], &K2_4->P1, &K2_2.P1, &inverses[2], &inverses[6]); + action_by_translation_compute_matrix(&Gi[3], &K2_4->P2, &K2_2.P2, &inverses[3], &inverses[7]); + + return 1; +} + +// Given the appropriate four torsion, computes the +// change of basis to compute the correct theta null +// point. +// Returns 0 if the order of K1_4 or K2_4 is not 4 +static int +gluing_change_of_basis(basis_change_matrix_t *M, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute the four 2x2 matrices for the action by translation + // on the four points: + translation_matrix_t Gi[4]; + if (!action_by_translation(Gi, K1_4, K2_4, E12)) + return 0; + + // Computation of the 4x4 matrix from Mij + // t001, t101 (resp t002, t102) first column of M11 * M21 (resp M12 * M22) + fp2_t t001, t101, t002, t102, tmp; + + fp2_mul(&t001, &Gi[0].g00, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g01, &Gi[2].g10); + fp2_add(&t001, &t001, &tmp); + + fp2_mul(&t101, &Gi[0].g10, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g11, &Gi[2].g10); + fp2_add(&t101, &t101, &tmp); + + fp2_mul(&t002, &Gi[1].g00, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g01, &Gi[3].g10); + fp2_add(&t002, &t002, &tmp); + + fp2_mul(&t102, &Gi[1].g10, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g11, &Gi[3].g10); + fp2_add(&t102, &t102, &tmp); + + // trace for the first row + fp2_set_one(&M->m[0][0]); + fp2_mul(&tmp, &t001, &t002); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + + fp2_mul(&M->m[0][1], &t001, &t102); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + + fp2_mul(&M->m[0][2], &t101, &t002); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + + fp2_mul(&M->m[0][3], &t101, &t102); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + + // Compute the action of (0,out.K2_4.P2) for the second row + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][1]); + fp2_mul(&M->m[1][0], &Gi[3].g00, &M->m[0][0]); + fp2_add(&M->m[1][0], &M->m[1][0], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][1]); + fp2_mul(&M->m[1][1], &Gi[3].g10, &M->m[0][0]); + fp2_add(&M->m[1][1], &M->m[1][1], &tmp); + + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][3]); + fp2_mul(&M->m[1][2], &Gi[3].g00, &M->m[0][2]); + fp2_add(&M->m[1][2], &M->m[1][2], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][3]); + fp2_mul(&M->m[1][3], &Gi[3].g10, &M->m[0][2]); + fp2_add(&M->m[1][3], &M->m[1][3], &tmp); + + // compute the action of (K1_4.P1,0) for the third row + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][2]); + fp2_mul(&M->m[2][0], &Gi[0].g00, &M->m[0][0]); + fp2_add(&M->m[2][0], &M->m[2][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][3]); + fp2_mul(&M->m[2][1], &Gi[0].g00, &M->m[0][1]); + fp2_add(&M->m[2][1], &M->m[2][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][2]); + fp2_mul(&M->m[2][2], &Gi[0].g10, &M->m[0][0]); + fp2_add(&M->m[2][2], &M->m[2][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][3]); + fp2_mul(&M->m[2][3], &Gi[0].g10, &M->m[0][1]); + fp2_add(&M->m[2][3], &M->m[2][3], &tmp); + + // compute the action of (K1_4.P1,K2_4.P2) for the final row + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][2]); + fp2_mul(&M->m[3][0], &Gi[0].g00, &M->m[1][0]); + fp2_add(&M->m[3][0], &M->m[3][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][3]); + fp2_mul(&M->m[3][1], &Gi[0].g00, &M->m[1][1]); + fp2_add(&M->m[3][1], &M->m[3][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][2]); + fp2_mul(&M->m[3][2], &Gi[0].g10, &M->m[1][0]); + fp2_add(&M->m[3][2], &M->m[3][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][3]); + fp2_mul(&M->m[3][3], &Gi[0].g10, &M->m[1][1]); + fp2_add(&M->m[3][3], &M->m[3][3], &tmp); + + return 1; +} + +/** + * @brief Compute the gluing isogeny from an elliptic product + * + * @param out Output: the theta_gluing + * @param K1_8 a couple point + * @param E12 an elliptic curve product + * @param K2_8 a point in E2[8] + * + * out : E1xE2 -> A of kernel [4](K1_8,K2_8) + * if the kernel supplied has the incorrect order, or gluing seems malformed, + * returns 0, otherwise returns 1. + */ +static int +gluing_compute(theta_gluing_t *out, + const theta_couple_curve_t *E12, + const theta_couple_jac_point_t *xyK1_8, + const theta_couple_jac_point_t *xyK2_8, + bool verify) +{ + // Ensure that we have been given the eight torsion +#ifndef NDEBUG + { + int check = test_jac_order_twof(&xyK1_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK1_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK1_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P2 does not have order 8"); + } +#endif + + out->xyK1_8 = *xyK1_8; + out->domain = *E12; + + // Given points in E[8] x E[8] we need the four torsion below + theta_couple_jac_point_t xyK1_4, xyK2_4; + + double_couple_jac_point(&xyK1_4, xyK1_8, E12); + double_couple_jac_point(&xyK2_4, xyK2_8, E12); + + // Convert from (X:Y:Z) coordinates to (X:Z) + theta_couple_point_t K1_8, K2_8; + theta_couple_point_t K1_4, K2_4; + + couple_jac_to_xz(&K1_8, xyK1_8); + couple_jac_to_xz(&K2_8, xyK2_8); + couple_jac_to_xz(&K1_4, &xyK1_4); + couple_jac_to_xz(&K2_4, &xyK2_4); + + // Set the basis change matrix, if we have not been given a valid K[8] for this computation + // gluing_change_of_basis will detect this and return 0 + if (!gluing_change_of_basis(&out->M, &K1_4, &K2_4, E12)) { + debug_print("gluing failed as kernel does not have correct order"); + return 0; + } + + // apply the base change to the kernel + theta_point_t TT1, TT2; + + base_change(&TT1, out, &K1_8); + base_change(&TT2, out, &K2_8); + + // compute the codomain + to_squared_theta(&TT1, &TT1); + to_squared_theta(&TT2, &TT2); + + // If the kernel is well formed then TT1.t and TT2.t are zero + // if they are not, we exit early as the signature we are validating + // is probably malformed + if (!(fp2_is_zero(&TT1.t) & fp2_is_zero(&TT2.t))) { + debug_print("gluing failed TT1.t or TT2.t is not zero"); + return 0; + } + // Test our projective factors are non zero + if (fp2_is_zero(&TT1.x) | fp2_is_zero(&TT2.x) | fp2_is_zero(&TT1.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT1.z)) + return 0; // invalid input + + // Projective factor: Ax + fp2_mul(&out->codomain.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.y, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.z, &TT1.x, &TT2.z); + fp2_set_zero(&out->codomain.t); + // Projective factor: ABCxz + fp2_mul(&out->precomputation.x, &TT1.y, &TT2.z); + fp2_copy(&out->precomputation.y, &out->codomain.z); + fp2_copy(&out->precomputation.z, &out->codomain.y); + fp2_set_zero(&out->precomputation.t); + + // Compute the two components of phi(K1_8) = (x:x:y:y). + fp2_mul(&out->imageK1_8.x, &TT1.x, &out->precomputation.x); + fp2_mul(&out->imageK1_8.y, &TT1.z, &out->precomputation.z); + + // If K1_8 and K2_8 are our 8-torsion points, this ensures that the + // 4-torsion points [2]K1_8 and [2]K2_8 are isotropic. + if (verify) { + fp2_t t1, t2; + fp2_mul(&t1, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&out->imageK1_8.x, &t1)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t2, &t1)) + return 0; + } + + // compute the final codomain + hadamard(&out->codomain, &out->codomain); + return 1; +} + +// sub routine of the gluing eval +static void +gluing_eval_point(theta_point_t *image, const theta_couple_jac_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T1, T2; + add_components_t add_comp1, add_comp2; + + // Compute the cross addition components of P1+Q1 and P2+Q2 + jac_to_xz_add_components(&add_comp1, &P->P1, &phi->xyK1_8.P1, &phi->domain.E1); + jac_to_xz_add_components(&add_comp2, &P->P2, &phi->xyK1_8.P2, &phi->domain.E2); + + // Compute T1 and T2 derived from the cross addition components. + fp2_mul(&T1.x, &add_comp1.u, &add_comp2.u); // T1x = u1u2 + fp2_mul(&T2.t, &add_comp1.v, &add_comp2.v); // T2t = v1v2 + fp2_add(&T1.x, &T1.x, &T2.t); // T1x = u1u2 + v1v2 + fp2_mul(&T1.y, &add_comp1.u, &add_comp2.w); // T1y = u1w2 + fp2_mul(&T1.z, &add_comp1.w, &add_comp2.u); // T1z = w1u2 + fp2_mul(&T1.t, &add_comp1.w, &add_comp2.w); // T1t = w1w2 + fp2_add(&T2.x, &add_comp1.u, &add_comp1.v); // T2x = (u1+v1) + fp2_add(&T2.y, &add_comp2.u, &add_comp2.v); // T2y = (u2+v2) + fp2_mul(&T2.x, &T2.x, &T2.y); // T2x = (u1+v1)(u2+v2) + fp2_sub(&T2.x, &T2.x, &T1.x); // T1x = v1u2 + u1v2 + fp2_mul(&T2.y, &add_comp1.v, &add_comp2.w); // T2y = v1w2 + fp2_mul(&T2.z, &add_comp1.w, &add_comp2.v); // T2z = w1v2 + fp2_set_zero(&T2.t); // T2t = 0 + + // Apply the basis change and compute their respective square + // theta(P+Q) = M.T1 - M.T2 and theta(P-Q) = M.T1 + M.T2 + apply_isomorphism_general(&T1, &phi->M, &T1, true); + apply_isomorphism_general(&T2, &phi->M, &T2, false); + pointwise_square(&T1, &T1); + pointwise_square(&T2, &T2); + + // the difference between the two is therefore theta(P+Q)theta(P-Q) + // whose hadamard transform is then the product of the dual + // theta_points of phi(P) and phi(Q). + fp2_sub(&T1.x, &T1.x, &T2.x); + fp2_sub(&T1.y, &T1.y, &T2.y); + fp2_sub(&T1.z, &T1.z, &T2.z); + fp2_sub(&T1.t, &T1.t, &T2.t); + hadamard(&T1, &T1); + + // Compute (x, y, z, t) + // As imageK1_8 = (x:x:y:y), its inverse is (y:y:x:x). + fp2_mul(&image->x, &T1.x, &phi->imageK1_8.y); + fp2_mul(&image->y, &T1.y, &phi->imageK1_8.y); + fp2_mul(&image->z, &T1.z, &phi->imageK1_8.x); + fp2_mul(&image->t, &T1.t, &phi->imageK1_8.x); + + hadamard(image, image); +} + +// Same as gluing_eval_point but in the very special case where we already know that the point will +// have a zero coordinate at the place where the zero coordinate of the dual_theta_nullpoint would +// have made the computation difficult +static int +gluing_eval_point_special_case(theta_point_t *image, const theta_couple_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T; + + // Apply the basis change + base_change(&T, phi, P); + + // Apply the to_squared_theta transform + to_squared_theta(&T, &T); + + // This coordinate should always be 0 in a gluing because D=0. + // If this is not the case, something went very wrong, so reject + if (!fp2_is_zero(&T.t)) + return 0; + + // Compute (x, y, z, t) + fp2_mul(&image->x, &T.x, &phi->precomputation.x); + fp2_mul(&image->y, &T.y, &phi->precomputation.y); + fp2_mul(&image->z, &T.z, &phi->precomputation.z); + fp2_set_zero(&image->t); + + hadamard(image, image); + return 1; +} + +/** + * @brief Evaluate a gluing isogeny from an elliptic product on a basis + * + * @param image1 Output: the theta_point of the image of the first couple of points + * @param image2 Output : the theta point of the image of the second couple of points + * @param xyT1: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param xyT2: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param phi : a gluing isogeny E1 x E2 -> A + * + **/ +static void +gluing_eval_basis(theta_point_t *image1, + theta_point_t *image2, + const theta_couple_jac_point_t *xyT1, + const theta_couple_jac_point_t *xyT2, + const theta_gluing_t *phi) +{ + gluing_eval_point(image1, xyT1, phi); + gluing_eval_point(image2, xyT2, phi); +} + +/** + * @brief Compute a (2,2) isogeny in dimension 2 in the theta_model + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_8 a point in A[8] + * @param T2_8 a point in A[8] + * @param hadamard_bool_1 a boolean used for the last two steps of the chain + * @param hadamard_bool_2 a boolean used for the last two steps of the chain + * + * out : A -> B of kernel [4](T1_8,T2_8) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * verify: add extra sanity check to ensure our 8-torsion points are coherent with the isogeny + * + */ +static int +theta_isogeny_compute(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_8, + const theta_point_t *T2_8, + bool hadamard_bool_1, + bool hadamard_bool_2, + bool verify) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_8; + out->T2_8 = *T2_8; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_8); + to_squared_theta(&TT1, &TT1); + hadamard(&TT2, T2_8); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_8); + to_squared_theta(&TT2, T2_8); + } + + fp2_t t1, t2; + + // Test that our projective factor ABCDxzw is non zero, where + // TT1=(Ax, Bx, Cy, Dy), TT2=(Az, Bw, Cz, Dw) + // But ABCDxzw=0 can only happen if we had an unexpected splitting in + // the isogeny chain. + // In either case reject + // (this is not strictly necessary, we could just return (0:0:0:0)) + if (fp2_is_zero(&TT2.x) | fp2_is_zero(&TT2.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT2.t) | fp2_is_zero(&TT1.x) | + fp2_is_zero(&TT1.y)) + return 0; + + fp2_mul(&t1, &TT1.x, &TT2.y); + fp2_mul(&t2, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.null_point.x, &TT2.x, &t1); + fp2_mul(&out->codomain.null_point.y, &TT2.y, &t2); + fp2_mul(&out->codomain.null_point.z, &TT2.z, &t1); + fp2_mul(&out->codomain.null_point.t, &TT2.t, &t2); + fp2_t t3; + fp2_mul(&t3, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.x, &t3, &TT1.y); + fp2_mul(&out->precomputation.y, &t3, &TT1.x); + fp2_copy(&out->precomputation.z, &out->codomain.null_point.t); + fp2_copy(&out->precomputation.t, &out->codomain.null_point.z); + + // If T1_8 and T2_8 are our 8-torsion points, this ensures that the + // 4-torsion points 2T1_8 and 2T2_8 are isotropic. + if (verify) { + fp2_mul(&t1, &TT1.x, &out->precomputation.x); + fp2_mul(&t2, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT1.z, &out->precomputation.z); + fp2_mul(&t2, &TT1.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.y, &out->precomputation.y); + fp2_mul(&t2, &TT2.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + } + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } + return 1; +} + +/** + * @brief Compute a (2,2) isogeny when only the 4 torsion above the kernel is known and not the 8 + * torsion + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_4 a point in A[4] + * @param T2_4 a point in A[4] + * @param hadamard_bool_1 a boolean + * @param hadamard_bool_2 a boolean + * + * out : A -> B of kernel [2](T1_4,T2_4) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_4(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_4, + const theta_point_t *T2_4, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_4; + out->T2_8 = *T2_4; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + // we will compute: + // TT1 = (xAB, _ , xCD, _) + // TT2 = (AA,BB,CC,DD) + + // fp2_t xA_inv,zA_inv,tB_inv; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_4); + to_squared_theta(&TT1, &TT1); + + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_4); + to_squared_theta(&TT2, &A->null_point); + } + + fp2_t sqaabb, sqaacc; + fp2_mul(&sqaabb, &TT2.x, &TT2.y); + fp2_mul(&sqaacc, &TT2.x, &TT2.z); + // No need to check the square roots, only used for signing. + // sqaabb = sqrt(AA*BB) + fp2_sqrt(&sqaabb); + // sqaacc = sqrt(AA*CC) + fp2_sqrt(&sqaacc); + + // we compute out->codomain.null_point = (xAB * sqaacc * AA, xAB *sqaabb *sqaacc, xCD*sqaabb * + // AA) out->precomputation = (xAB * BB * CC *DD , sqaabb * CC * DD * xAB , sqaacc * BB* DD * xAB + // , xCD * sqaabb *sqaacc * BB) + + fp2_mul(&out->codomain.null_point.y, &sqaabb, &sqaacc); + fp2_mul(&out->precomputation.t, &out->codomain.null_point.y, &TT1.z); + fp2_mul(&out->codomain.null_point.y, &out->codomain.null_point.y, + &TT1.x); // done for out->codomain.null_point.y + + fp2_mul(&out->codomain.null_point.t, &TT1.z, &sqaabb); + fp2_mul(&out->codomain.null_point.t, &out->codomain.null_point.t, + &TT2.x); // done for out->codomain.null_point.t + + fp2_mul(&out->codomain.null_point.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.null_point.z, &out->codomain.null_point.x, + &TT2.z); // done for out->codomain.null_point.z + fp2_mul(&out->codomain.null_point.x, &out->codomain.null_point.x, + &sqaacc); // done for out->codomain.null_point.x + + fp2_mul(&out->precomputation.x, &TT1.x, &TT2.t); + fp2_mul(&out->precomputation.z, &out->precomputation.x, &TT2.y); + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.z); + fp2_mul(&out->precomputation.y, &out->precomputation.x, &sqaabb); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &out->precomputation.z, &sqaacc); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +/** + * @brief Compute a (2,2) isogeny when only the kernel is known and not the 8 or 4 torsion above + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_2 a point in A[2] + * @param T2_2 a point in A[2] + * @param hadamard_bool_1 a boolean + * @param boo2 a boolean + * + * out : A -> B of kernel (T1_2,T2_2) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_2(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_2, + const theta_point_t *T2_2, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_2; + out->T2_8 = *T2_2; + out->codomain.precomputation = false; + + theta_point_t TT2; + // we will compute: + // TT2 = (AA,BB,CC,DD) + + if (hadamard_bool_1) { + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT2, &A->null_point); + } + + // we compute out->codomain.null_point = (AA,sqaabb, sqaacc, sqaadd) + // out->precomputation = ( BB * CC *DD , sqaabb * CC * DD , sqaacc * BB* DD , sqaadd * BB * CC) + fp2_copy(&out->codomain.null_point.x, &TT2.x); + fp2_mul(&out->codomain.null_point.y, &TT2.x, &TT2.y); + fp2_mul(&out->codomain.null_point.z, &TT2.x, &TT2.z); + fp2_mul(&out->codomain.null_point.t, &TT2.x, &TT2.t); + // No need to check the square roots, only used for signing. + fp2_sqrt(&out->codomain.null_point.y); + fp2_sqrt(&out->codomain.null_point.z); + fp2_sqrt(&out->codomain.null_point.t); + + fp2_mul(&out->precomputation.x, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.y, + &out->precomputation.x, + &out->codomain.null_point.y); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &TT2.t, &out->codomain.null_point.z); + fp2_mul(&out->precomputation.z, &out->precomputation.z, &TT2.y); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &TT2.z, &out->codomain.null_point.t); + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +static void +theta_isogeny_eval(theta_point_t *out, const theta_isogeny_t *phi, const theta_point_t *P) +{ + if (phi->hadamard_bool_1) { + hadamard(out, P); + to_squared_theta(out, out); + } else { + to_squared_theta(out, P); + } + fp2_mul(&out->x, &out->x, &phi->precomputation.x); + fp2_mul(&out->y, &out->y, &phi->precomputation.y); + fp2_mul(&out->z, &out->z, &phi->precomputation.z); + fp2_mul(&out->t, &out->t, &phi->precomputation.t); + + if (phi->hadamard_bool_2) { + hadamard(out, out); + } +} + +#if defined(ENABLE_SIGN) +// Sample a random secret index in [0, 5] to select one of the 6 normalisation +// matrices for the normalisation of the output of the (2,2)-chain during +// splitting +static unsigned char +sample_random_index(void) +{ + // To avoid bias in reduction we should only consider integers smaller + // than 2^32 which are a multiple of 6, so we only reduce bytes with a + // value in [0, 4294967292-1]. + // We have 4294967292/2^32 = ~99.9999999% chance that the first try is "good". + unsigned char seed_arr[4]; + uint32_t seed; + + do { + randombytes(seed_arr, 4); + seed = (seed_arr[0] | (seed_arr[1] << 8) | (seed_arr[2] << 16) | (seed_arr[3] << 24)); + } while (seed >= 4294967292U); + + uint32_t secret_index = seed - (((uint64_t)seed * 2863311531U) >> 34) * 6; + assert(secret_index == seed % 6); // ensure the constant time trick above works + return (unsigned char)secret_index; +} +#endif + +static bool +splitting_compute(theta_splitting_t *out, const theta_structure_t *A, int zero_index, bool randomize) + +{ + // init + uint32_t ctl; + uint32_t count = 0; + fp2_t U_cst, t1, t2; + + memset(&out->M, 0, sizeof(basis_change_matrix_t)); + + // enumerate through all indices + for (int i = 0; i < 10; i++) { + fp2_set_zero(&U_cst); + for (int t = 0; t < 4; t++) { + // Iterate through the null point + choose_index_theta_point(&t2, t, &A->null_point); + choose_index_theta_point(&t1, t ^ EVEN_INDEX[i][1], &A->null_point); + + // Compute t1 * t2 + fp2_mul(&t1, &t1, &t2); + // If CHI_EVAL(i,t) is +1 we want ctl to be 0 and + // If CHI_EVAL(i,t) is -1 we want ctl to be 0xFF..FF + ctl = (uint32_t)(CHI_EVAL[EVEN_INDEX[i][0]][t] >> 1); + assert(ctl == 0 || ctl == 0xffffffff); + + fp2_neg(&t2, &t1); + fp2_select(&t1, &t1, &t2, ctl); + + // Then we compute U_cst ± (t1 * t2) + fp2_add(&U_cst, &U_cst, &t1); + } + + // If U_cst is 0 then update the splitting matrix + ctl = fp2_is_zero(&U_cst); + count -= ctl; + select_base_change_matrix(&out->M, &out->M, &SPLITTING_TRANSFORMS[i], ctl); + if (zero_index != -1 && i == zero_index && + !ctl) { // extra checks if we know exactly where the 0 index should be + return 0; + } + } + +#if defined(ENABLE_SIGN) + // Pick a random normalization matrix + if (randomize) { + unsigned char secret_index = sample_random_index(); + basis_change_matrix_t Mrandom; + + set_base_change_matrix_from_precomp(&Mrandom, &NORMALIZATION_TRANSFORMS[0]); + + // Use a constant time selection to pick the index we want + for (unsigned char i = 1; i < 6; i++) { + // When i == secret_index, mask == 0 and 0xFF..FF otherwise + int32_t mask = i - secret_index; + mask = (mask | -mask) >> 31; + select_base_change_matrix(&Mrandom, &Mrandom, &NORMALIZATION_TRANSFORMS[i], ~mask); + } + base_change_matrix_multiplication(&out->M, &Mrandom, &out->M); + } +#else + assert(!randomize); +#endif + + // apply the isomorphism to ensure the null point is compatible with splitting + apply_isomorphism(&out->B.null_point, &out->M, &A->null_point); + + // splitting was successful only if exactly one zero was identified + return count == 1; +} + +static int +theta_product_structure_to_elliptic_product(theta_couple_curve_t *E12, theta_structure_t *A) +{ + fp2_t xx, yy; + + // This should be true from our computations in splitting_compute + // but still check this for sanity + if (!is_product_theta_point(&A->null_point)) + return 0; + + ec_curve_init(&(E12->E1)); + ec_curve_init(&(E12->E2)); + + // A valid elliptic theta null point has no zero coordinate + if (fp2_is_zero(&A->null_point.x) | fp2_is_zero(&A->null_point.y) | fp2_is_zero(&A->null_point.z)) + return 0; + + // xx = x², yy = y² + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.y); + // xx = x^4, yy = y^4 + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A2 = -2(x^4+y^4)/(x^4-y^4) + fp2_add(&E12->E2.A, &xx, &yy); + fp2_sub(&E12->E2.C, &xx, &yy); + fp2_add(&E12->E2.A, &E12->E2.A, &E12->E2.A); + fp2_neg(&E12->E2.A, &E12->E2.A); + + // same with x,z + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.z); + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A1 = -2(x^4+z^4)/(x^4-z^4) + fp2_add(&E12->E1.A, &xx, &yy); + fp2_sub(&E12->E1.C, &xx, &yy); + fp2_add(&E12->E1.A, &E12->E1.A, &E12->E1.A); + fp2_neg(&E12->E1.A, &E12->E1.A); + + if (fp2_is_zero(&E12->E1.C) | fp2_is_zero(&E12->E2.C)) + return 0; + + return 1; +} + +static int +theta_point_to_montgomery_point(theta_couple_point_t *P12, const theta_point_t *P, const theta_structure_t *A) +{ + fp2_t temp; + const fp2_t *x, *z; + + if (!is_product_theta_point(P)) + return 0; + + x = &P->x; + z = &P->y; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->z; + z = &P->t; + } + if (fp2_is_zero(x) & fp2_is_zero(z)) { + return 0; // at this point P=(0:0:0:0) so is invalid + } + // P2.X = A.null_point.y * P.x + A.null_point.x * P.y + // P2.Z = - A.null_point.y * P.x + A.null_point.x * P.y + fp2_mul(&P12->P2.x, &A->null_point.y, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P2.z, &temp, &P12->P2.x); + fp2_add(&P12->P2.x, &P12->P2.x, &temp); + + x = &P->x; + z = &P->z; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->y; + z = &P->t; + } + // P1.X = A.null_point.z * P.x + A.null_point.x * P.z + // P1.Z = -A.null_point.z * P.x + A.null_point.x * P.z + fp2_mul(&P12->P1.x, &A->null_point.z, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P1.z, &temp, &P12->P1.x); + fp2_add(&P12->P1.x, &P12->P1.x, &temp); + return 1; +} + +static int +_theta_chain_compute_impl(unsigned n, + theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP, + bool verify, + bool randomize) +{ + theta_structure_t theta; + + // lift the basis + theta_couple_jac_point_t xyT1, xyT2; + + ec_basis_t bas1 = { .P = ker->T1.P1, .Q = ker->T2.P1, .PmQ = ker->T1m2.P1 }; + ec_basis_t bas2 = { .P = ker->T1.P2, .Q = ker->T2.P2, .PmQ = ker->T1m2.P2 }; + if (!lift_basis(&xyT1.P1, &xyT2.P1, &bas1, &E12->E1)) + return 0; + if (!lift_basis(&xyT1.P2, &xyT2.P2, &bas2, &E12->E2)) + return 0; + + const unsigned extra = HD_extra_torsion * extra_torsion; + +#ifndef NDEBUG + assert(extra == 0 || extra == 2); // only cases implemented + if (!test_point_order_twof(&bas2.P, &E12->E2, n + extra)) + debug_print("bas2.P does not have correct order"); + + if (!test_jac_order_twof(&xyT2.P2, &E12->E2, n + extra)) + debug_print("xyT2.P2 does not have correct order"); +#endif + + theta_point_t pts[numP ? numP : 1]; + + int space = 1; + for (unsigned i = 1; i < n; i *= 2) + ++space; + + uint16_t todo[space]; + todo[0] = n - 2 + extra; + + int current = 0; + + // kernel points for the gluing isogeny + theta_couple_jac_point_t jacQ1[space], jacQ2[space]; + jacQ1[0] = xyT1; + jacQ2[0] = xyT2; + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + // the gluing isogeny is quite a bit more expensive than the others, + // so we adjust the usual splitting rule here a little bit: towards + // the end of the doubling chain it will be cheaper to recompute the + // doublings after evaluation than to push the intermediate points. + const unsigned num_dbls = todo[current - 1] >= 16 ? todo[current - 1] / 2 : todo[current - 1] - 1; + assert(num_dbls && num_dbls < todo[current - 1]); + double_couple_jac_point_iter(&jacQ1[current], num_dbls, &jacQ1[current - 1], E12); + double_couple_jac_point_iter(&jacQ2[current], num_dbls, &jacQ2[current - 1], E12); + todo[current] = todo[current - 1] - num_dbls; + } + + // kernel points for the remaining isogeny steps + theta_point_t thetaQ1[space], thetaQ2[space]; + + // the gluing step + theta_gluing_t first_step; + { + assert(todo[current] == 1); + + // compute the gluing isogeny + if (!gluing_compute(&first_step, E12, &jacQ1[current], &jacQ2[current], verify)) + return 0; + + // evaluate + for (unsigned j = 0; j < numP; ++j) { + assert(ec_is_zero(&P12[j].P1) || ec_is_zero(&P12[j].P2)); + if (!gluing_eval_point_special_case(&pts[j], &P12[j], &first_step)) + return 0; + } + + // push kernel points through gluing isogeny + for (int j = 0; j < current; ++j) { + gluing_eval_basis(&thetaQ1[j], &thetaQ2[j], &jacQ1[j], &jacQ2[j], &first_step); + --todo[j]; + } + + --current; + } + + // set-up the theta_structure for the first codomain + theta.null_point = first_step.codomain; + theta.precomputation = 0; + theta_precomputation(&theta); + + theta_isogeny_t step; + + // and now we do the remaining steps + for (unsigned i = 1; current >= 0 && todo[current]; ++i) { + assert(current < space); + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + const unsigned num_dbls = todo[current - 1] / 2; + assert(num_dbls && num_dbls < todo[current - 1]); + double_iter(&thetaQ1[current], &theta, &thetaQ1[current - 1], num_dbls); + double_iter(&thetaQ2[current], &theta, &thetaQ2[current - 1], num_dbls); + todo[current] = todo[current - 1] - num_dbls; + } + + // computing the next step + int ret; + if (i == n - 2) // penultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 0, verify); + else if (i == n - 1) // ultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 1, 0, false); + else + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 1, verify); + if (!ret) + return 0; + + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + + // updating the codomain + theta = step.codomain; + + // pushing the kernel + assert(todo[current] == 1); + for (int j = 0; j < current; ++j) { + theta_isogeny_eval(&thetaQ1[j], &step, &thetaQ1[j]); + theta_isogeny_eval(&thetaQ2[j], &step, &thetaQ2[j]); + assert(todo[j]); + --todo[j]; + } + + --current; + } + + assert(current == -1); + + if (!extra_torsion) { + if (n >= 3) { + // in the last step we've skipped pushing the kernel since current was == 0, let's do it now + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + } + + // penultimate step + theta_isogeny_compute_4(&step, &theta, &thetaQ1[0], &thetaQ2[0], 0, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + + // ultimate step + theta_isogeny_compute_2(&step, &theta, &thetaQ1[0], &thetaQ2[0], 1, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + } + + // final splitting step + theta_splitting_t last_step; + + bool is_split = splitting_compute(&last_step, &theta, extra_torsion ? 8 : -1, randomize); + + if (!is_split) { + debug_print("kernel did not generate an isogeny between elliptic products"); + return 0; + } + + if (!theta_product_structure_to_elliptic_product(E34, &last_step.B)) + return 0; + + // evaluate + for (size_t j = 0; j < numP; ++j) { + apply_isomorphism(&pts[j], &last_step.M, &pts[j]); + if (!theta_point_to_montgomery_point(&P12[j], &pts[j], &last_step.B)) + return 0; + } + + return 1; +} + +int +theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, false); +} + +// Like theta_chain_compute_and_eval, adding extra verification checks; +// used in the signature verification +int +theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, true, false); +} + +int +theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, true); +} diff --git a/src/pqm4/sqisign_lvl3/ref/theta_isogenies.h b/src/pqm4/sqisign_lvl3/ref/theta_isogenies.h new file mode 100644 index 0000000..d151811 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/theta_isogenies.h @@ -0,0 +1,18 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta isogeny header + */ + +#ifndef THETA_ISOGENY_H +#define THETA_ISOGENY_H + +#include +#include +#include +#include "theta_structure.h" +#include +#include + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/theta_structure.c b/src/pqm4/sqisign_lvl3/ref/theta_structure.c new file mode 100644 index 0000000..ce97ac6 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/theta_structure.c @@ -0,0 +1,78 @@ +#include "theta_structure.h" +#include + +void +theta_precomputation(theta_structure_t *A) +{ + + if (A->precomputation) { + return; + } + + theta_point_t A_dual; + to_squared_theta(&A_dual, &A->null_point); + + fp2_t t1, t2; + fp2_mul(&t1, &A_dual.x, &A_dual.y); + fp2_mul(&t2, &A_dual.z, &A_dual.t); + fp2_mul(&A->XYZ0, &t1, &A_dual.z); + fp2_mul(&A->XYT0, &t1, &A_dual.t); + fp2_mul(&A->YZT0, &t2, &A_dual.y); + fp2_mul(&A->XZT0, &t2, &A_dual.x); + + fp2_mul(&t1, &A->null_point.x, &A->null_point.y); + fp2_mul(&t2, &A->null_point.z, &A->null_point.t); + fp2_mul(&A->xyz0, &t1, &A->null_point.z); + fp2_mul(&A->xyt0, &t1, &A->null_point.t); + fp2_mul(&A->yzt0, &t2, &A->null_point.y); + fp2_mul(&A->xzt0, &t2, &A->null_point.x); + + A->precomputation = true; +} + +void +double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in) +{ + to_squared_theta(out, in); + fp2_sqr(&out->x, &out->x); + fp2_sqr(&out->y, &out->y); + fp2_sqr(&out->z, &out->z); + fp2_sqr(&out->t, &out->t); + + if (!A->precomputation) { + theta_precomputation(A); + } + fp2_mul(&out->x, &out->x, &A->YZT0); + fp2_mul(&out->y, &out->y, &A->XZT0); + fp2_mul(&out->z, &out->z, &A->XYT0); + fp2_mul(&out->t, &out->t, &A->XYZ0); + + hadamard(out, out); + + fp2_mul(&out->x, &out->x, &A->yzt0); + fp2_mul(&out->y, &out->y, &A->xzt0); + fp2_mul(&out->z, &out->z, &A->xyt0); + fp2_mul(&out->t, &out->t, &A->xyz0); +} + +void +double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp) +{ + if (exp == 0) { + *out = *in; + } else { + double_point(out, A, in); + for (int i = 1; i < exp; i++) { + double_point(out, A, out); + } + } +} + +uint32_t +is_product_theta_point(const theta_point_t *P) +{ + fp2_t t1, t2; + fp2_mul(&t1, &P->x, &P->t); + fp2_mul(&t2, &P->y, &P->z); + return fp2_is_equal(&t1, &t2); +} diff --git a/src/pqm4/sqisign_lvl3/ref/theta_structure.h b/src/pqm4/sqisign_lvl3/ref/theta_structure.h new file mode 100644 index 0000000..fc630b7 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/theta_structure.h @@ -0,0 +1,135 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta structure header + */ + +#ifndef THETA_STRUCTURE_H +#define THETA_STRUCTURE_H + +#include +#include +#include + +/** @internal + * @ingroup hd_module + * @defgroup hd_theta Functions for theta structures + * @{ + */ + +/** + * @brief Perform the hadamard transform on a theta point + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x+y+z+t, x-y+z-t, x+y-z-t, x-y-z+t) + * + */ +static inline void +hadamard(theta_point_t *out, const theta_point_t *in) +{ + fp2_t t1, t2, t3, t4; + + // t1 = x + y + fp2_add(&t1, &in->x, &in->y); + // t2 = x - y + fp2_sub(&t2, &in->x, &in->y); + // t3 = z + t + fp2_add(&t3, &in->z, &in->t); + // t4 = z - t + fp2_sub(&t4, &in->z, &in->t); + + fp2_add(&out->x, &t1, &t3); + fp2_add(&out->y, &t2, &t4); + fp2_sub(&out->z, &t1, &t3); + fp2_sub(&out->t, &t2, &t4); +} + +/** + * @brief Square the coordinates of a theta point + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2, y^2, z^2, t^2) + * + */ +static inline void +pointwise_square(theta_point_t *out, const theta_point_t *in) +{ + fp2_sqr(&out->x, &in->x); + fp2_sqr(&out->y, &in->y); + fp2_sqr(&out->z, &in->z); + fp2_sqr(&out->t, &in->t); +} + +/** + * @brief Square the coordinates and then perform the hadamard transform + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2+y^2+z^2+t^2, x^2-y^2+z^2-t^2, x^2+y^2-z^2-t^2, x^2-y^2-z^2+t^2) + * + */ +static inline void +to_squared_theta(theta_point_t *out, const theta_point_t *in) +{ + pointwise_square(out, in); + hadamard(out, out); +} + +/** + * @brief Perform the theta structure precomputation + * + * @param A Output: the theta_structure + * + * if A.null_point = (x,y,z,t) + * if (xx,yy,zz,tt) = to_squared_theta(A.null_point) + * Computes y0,z0,t0,Y0,Z0,T0 = x/y,x/z,x/t,XX/YY,XX/ZZ,XX/TT + * + */ +void theta_precomputation(theta_structure_t *A); + +/** + * @brief Compute the double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * in = (x,y,z,t) + * out = [2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in); + +/** + * @brief Compute the iterated double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * @param exp the exponent + * in = (x,y,z,t) + * out = [2^2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp); + +/* + * @brief Check if a theta point is a product theta point + * + * @param P a theta point + * @return 0xFFFFFFFF if true, zero otherwise + */ +uint32_t is_product_theta_point(const theta_point_t *P); + +// end hd_theta +/** + * @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/tools.h b/src/pqm4/sqisign_lvl3/ref/tools.h new file mode 100644 index 0000000..5a6a505 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/tools.h @@ -0,0 +1,49 @@ + +#ifndef TOOLS_H +#define TOOLS_H + +#include + +// Debug printing: +// https://stackoverflow.com/questions/1644868/define-macro-for-debug-printing-in-c +#ifndef NDEBUG +#define DEBUG_PRINT 1 +#else +#define DEBUG_PRINT 0 +#endif + +#ifndef __FILE_NAME__ +#define __FILE_NAME__ "NA" +#endif + +#ifndef __LINE__ +#define __LINE__ 0 +#endif + +#ifndef __func__ +#define __func__ "NA" +#endif + +#define debug_print(fmt) \ + do { \ + if (DEBUG_PRINT) \ + printf("warning: %s, file %s, line %d, function %s().\n", \ + fmt, \ + __FILE_NAME__, \ + __LINE__, \ + __func__); \ + } while (0) + + +clock_t tic(void); +float tac(void); /* time in ms since last tic */ +float TAC(const char *str); /* same, but prints it with label 'str' */ +float toc(const clock_t t); /* time in ms since t */ +float TOC(const clock_t t, const char *str); /* same, but prints it with label 'str' */ +float TOC_clock(const clock_t t, const char *str); + +clock_t dclock(const clock_t t); // return the clock cycle diff between now and t +float clock_to_time(const clock_t t, + const char *str); // convert the number of clock cycles t to time +float clock_print(const clock_t t, const char *str); +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/tutil.h b/src/pqm4/sqisign_lvl3/ref/tutil.h new file mode 100644 index 0000000..59f1620 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/tutil.h @@ -0,0 +1,36 @@ +#ifndef TUTIL_H +#define TUTIL_H + +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define BSWAP16(i) __builtin_bswap16((i)) +#define BSWAP32(i) __builtin_bswap32((i)) +#define BSWAP64(i) __builtin_bswap64((i)) +#define UNUSED __attribute__((unused)) +#else +#define BSWAP16(i) ((((i) >> 8) & 0xff) | (((i) & 0xff00) << 8)) +#define BSWAP32(i) \ + ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24)) +#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32) +#define UNUSED +#endif + +#if defined(RADIX_64) +#define digit_t uint64_t +#define sdigit_t int64_t +#define RADIX 64 +#define LOG2RADIX 6 +#define BSWAP_DIGIT(i) BSWAP64(i) +#elif defined(RADIX_32) +#define digit_t uint32_t +#define sdigit_t int32_t +#define RADIX 32 +#define LOG2RADIX 5 +#define BSWAP_DIGIT(i) BSWAP32(i) +#else +#error "Radix must be 32bit or 64 bit" +#endif + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/verification.h b/src/pqm4/sqisign_lvl3/ref/verification.h new file mode 100644 index 0000000..af67469 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/verification.h @@ -0,0 +1,123 @@ +/** @file + * + * @brief The verification protocol + */ + +#ifndef VERIFICATION_H +#define VERIFICATION_H + +#include +#include + +/** @defgroup verification SQIsignHD verification protocol + * @{ + */ + +/** @defgroup verification_t Types for SQIsignHD verification protocol + * @{ + */ + +typedef digit_t scalar_t[NWORDS_ORDER]; +typedef scalar_t scalar_mtx_2x2_t[2][2]; + +/** @brief Type for the signature + * + * @typedef signature_t + * + * @struct signature + * + */ +typedef struct signature +{ + fp2_t E_aux_A; // the Montgomery A-coefficient for the auxiliary curve + uint8_t backtracking; + uint8_t two_resp_length; + scalar_mtx_2x2_t mat_Bchall_can_to_B_chall; // the matrix of the desired basis + scalar_t chall_coeff; + uint8_t hint_aux; + uint8_t hint_chall; +} signature_t; + +/** @brief Type for the public keys + * + * @typedef public_key_t + * + * @struct public_key + * + */ +typedef struct public_key +{ + ec_curve_t curve; // the normalized A-coefficient of the Montgomery curve + uint8_t hint_pk; +} public_key_t; + +/** @} + */ + +/*************************** Functions *****************************/ + +void public_key_init(public_key_t *pk); +void public_key_finalize(public_key_t *pk); + +void hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length); + +/** + * @brief Verification + * + * @param sig signature + * @param pk public key + * @param m message + * @param l size + * @returns 1 if the signature verifies, 0 otherwise + */ +int protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l); + +/*************************** Encoding *****************************/ + +/** @defgroup encoding Encoding and decoding functions + * @{ + */ + +/** + * @brief Encodes a signature as a byte array + * + * @param enc : Byte array to encode the signature in + * @param sig : Signature to encode + */ +void signature_to_bytes(unsigned char *enc, const signature_t *sig); + +/** + * @brief Decodes a signature from a byte array + * + * @param sig : Structure to decode the signature in + * @param enc : Byte array to decode + */ +void signature_from_bytes(signature_t *sig, const unsigned char *enc); + +/** + * @brief Encodes a public key as a byte array + * + * @param enc : Byte array to encode the public key in + * @param pk : Public key to encode + */ +unsigned char *public_key_to_bytes(unsigned char *enc, const public_key_t *pk); + +/** + * @brief Decodes a public key from a byte array + * + * @param pk : Structure to decode the public key in + * @param enc : Byte array to decode + */ +const unsigned char *public_key_from_bytes(public_key_t *pk, const unsigned char *enc); + +/** @} + */ + +/** @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl3/ref/verify.c b/src/pqm4/sqisign_lvl3/ref/verify.c new file mode 100644 index 0000000..b5f78ad --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/verify.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include + +// Check that the basis change matrix elements are canonical +// representatives modulo 2^(SQIsign_response_length + 2). +static int +check_canonical_basis_change_matrix(const signature_t *sig) +{ + // This works as long as all values in sig->mat_Bchall_can_to_B_chall are + // positive integers. + int ret = 1; + scalar_t aux; + + memset(aux, 0, NWORDS_ORDER * sizeof(digit_t)); + aux[0] = 0x1; + multiple_mp_shiftl(aux, SQIsign_response_length + HD_extra_torsion - (int)sig->backtracking, NWORDS_ORDER); + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + if (mp_compare(aux, sig->mat_Bchall_can_to_B_chall[i][j], NWORDS_ORDER) <= 0) { + ret = 0; + } + } + } + + return ret; +} + +// Compute the 2^n isogeny from the signature with kernel +// P + [chall_coeff]Q and store the codomain in E_chall +static int +compute_challenge_verify(ec_curve_t *E_chall, const signature_t *sig, const ec_curve_t *Epk, const uint8_t hint_pk) +{ + ec_basis_t bas_EA; + ec_isog_even_t phi_chall; + + // Set domain and length of 2^n isogeny + copy_curve(&phi_chall.curve, Epk); + phi_chall.length = TORSION_EVEN_POWER - sig->backtracking; + + // Compute the basis from the supplied hint + if (!ec_curve_to_basis_2f_from_hint(&bas_EA, &phi_chall.curve, TORSION_EVEN_POWER, hint_pk)) // canonical + return 0; + + // recovering the exact challenge + { + if (!ec_ladder3pt(&phi_chall.kernel, sig->chall_coeff, &bas_EA.P, &bas_EA.Q, &bas_EA.PmQ, &phi_chall.curve)) { + return 0; + }; + } + + // Double the kernel until is has the correct order + ec_dbl_iter(&phi_chall.kernel, sig->backtracking, &phi_chall.kernel, &phi_chall.curve); + + // Compute the codomain + copy_curve(E_chall, &phi_chall.curve); + if (ec_eval_even(E_chall, &phi_chall, NULL, 0)) + return 0; + return 1; +} + +// same as matrix_application_even_basis() in id2iso.c, with some modifications: +// - this version works with a matrix of scalars (not ibz_t). +// - reduction modulo 2^f of matrix elements is removed here, because it is +// assumed that the elements are already cannonical representatives modulo +// 2^f; this is ensured by calling check_canonical_basis_change_matrix() at +// the beginning of protocols_verify(). +static int +matrix_scalar_application_even_basis(ec_basis_t *bas, const ec_curve_t *E, scalar_mtx_2x2_t *mat, int f) +{ + scalar_t scalar0, scalar1; + memset(scalar0, 0, NWORDS_ORDER * sizeof(digit_t)); + memset(scalar1, 0, NWORDS_ORDER * sizeof(digit_t)); + + ec_basis_t tmp_bas; + copy_basis(&tmp_bas, bas); + + // For a matrix [[a, c], [b, d]] we compute: + // + // first basis element R = [a]P + [b]Q + if (!ec_biscalar_mul(&bas->P, (*mat)[0][0], (*mat)[1][0], f, &tmp_bas, E)) + return 0; + // second basis element S = [c]P + [d]Q + if (!ec_biscalar_mul(&bas->Q, (*mat)[0][1], (*mat)[1][1], f, &tmp_bas, E)) + return 0; + // Their difference R - S = [a - c]P + [b - d]Q + mp_sub(scalar0, (*mat)[0][0], (*mat)[0][1], NWORDS_ORDER); + mp_mod_2exp(scalar0, f, NWORDS_ORDER); + mp_sub(scalar1, (*mat)[1][0], (*mat)[1][1], NWORDS_ORDER); + mp_mod_2exp(scalar1, f, NWORDS_ORDER); + return ec_biscalar_mul(&bas->PmQ, scalar0, scalar1, f, &tmp_bas, E); +} + +// Compute the bases for the challenge and auxillary curve from +// the canonical bases. Challenge basis is reconstructed from the +// compressed scalars within the challenge. +static int +challenge_and_aux_basis_verify(ec_basis_t *B_chall_can, + ec_basis_t *B_aux_can, + ec_curve_t *E_chall, + ec_curve_t *E_aux, + signature_t *sig, + const int pow_dim2_deg_resp) +{ + + // recovering the canonical basis as TORSION_EVEN_POWER for consistency with signing + if (!ec_curve_to_basis_2f_from_hint(B_chall_can, E_chall, TORSION_EVEN_POWER, sig->hint_chall)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_chall_can, + TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion - sig->two_resp_length, + B_chall_can, + E_chall); + + if (!ec_curve_to_basis_2f_from_hint(B_aux_can, E_aux, TORSION_EVEN_POWER, sig->hint_aux)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_aux_can, TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion, B_aux_can, E_aux); + +#ifndef NDEBUG + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp + sig->two_resp_length)) + debug_print("canonical basis has wrong order, expect something to fail"); +#endif + + // applying the change matrix on the basis of E_chall + return matrix_scalar_application_even_basis(B_chall_can, + E_chall, + &sig->mat_Bchall_can_to_B_chall, + pow_dim2_deg_resp + HD_extra_torsion + sig->two_resp_length); +} + +// When two_resp_length is non-zero, we must compute a small 2^n-isogeny +// updating E_chall as the codomain as well as push the basis on E_chall +// through this isogeny +static int +two_response_isogeny_verify(ec_curve_t *E_chall, ec_basis_t *B_chall_can, const signature_t *sig, int pow_dim2_deg_resp) +{ + ec_point_t ker, points[3]; + + // choosing the right point for the small two_isogenies + if (mp_is_even(sig->mat_Bchall_can_to_B_chall[0][0], NWORDS_ORDER) && + mp_is_even(sig->mat_Bchall_can_to_B_chall[1][0], NWORDS_ORDER)) { + copy_point(&ker, &B_chall_can->Q); + } else { + copy_point(&ker, &B_chall_can->P); + } + + copy_point(&points[0], &B_chall_can->P); + copy_point(&points[1], &B_chall_can->Q); + copy_point(&points[2], &B_chall_can->PmQ); + + ec_dbl_iter(&ker, pow_dim2_deg_resp + HD_extra_torsion, &ker, E_chall); + +#ifndef NDEBUG + if (!test_point_order_twof(&ker, E_chall, sig->two_resp_length)) + debug_print("kernel does not have order 2^(two_resp_length"); +#endif + + if (ec_eval_small_chain(E_chall, &ker, sig->two_resp_length, points, 3, false)) { + return 0; + } + +#ifndef NDEBUG + if (!test_point_order_twof(&points[0], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[0] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[1], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[1] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[2], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[2] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + copy_point(&B_chall_can->P, &points[0]); + copy_point(&B_chall_can->Q, &points[1]); + copy_point(&B_chall_can->PmQ, &points[2]); + return 1; +} + +// The commitment curve can be recovered from the codomain of the 2D +// isogeny built from the bases computed during verification. +static int +compute_commitment_curve_verify(ec_curve_t *E_com, + const ec_basis_t *B_chall_can, + const ec_basis_t *B_aux_can, + const ec_curve_t *E_chall, + const ec_curve_t *E_aux, + int pow_dim2_deg_resp) + +{ +#ifndef NDEBUG + // Check all the points are the correct order + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_chall_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + + if (!test_basis_order_twof(B_aux_can, E_aux, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_aux_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + // now compute the dim2 isogeny from Echall x E_aux -> E_com x E_aux' + // of kernel B_chall_can x B_aux_can + + // first we set-up the kernel + theta_couple_curve_t EchallxEaux; + copy_curve(&EchallxEaux.E1, E_chall); + copy_curve(&EchallxEaux.E2, E_aux); + + theta_kernel_couple_points_t dim_two_ker; + copy_bases_to_kernel(&dim_two_ker, B_chall_can, B_aux_can); + + // computing the isogeny + theta_couple_curve_t codomain; + int codomain_splits; + ec_curve_init(&codomain.E1); + ec_curve_init(&codomain.E2); + // handling the special case where we don't need to perform any dim2 computation + if (pow_dim2_deg_resp == 0) { + codomain_splits = 1; + copy_curve(&codomain.E1, &EchallxEaux.E1); + copy_curve(&codomain.E2, &EchallxEaux.E2); + // We still need to check that E_chall is supersingular + // This assumes that HD_extra_torsion == 2 + if (!ec_is_basis_four_torsion(B_chall_can, E_chall)) { + return 0; + } + } else { + codomain_splits = theta_chain_compute_and_eval_verify( + pow_dim2_deg_resp, &EchallxEaux, &dim_two_ker, true, &codomain, NULL, 0); + } + + // computing the commitment curve + // its always the first one because of our (2^n,2^n)-isogeny formulae + copy_curve(E_com, &codomain.E1); + + return codomain_splits; +} + +// SQIsign verification +int +protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l) +{ + int verify; + + if (!check_canonical_basis_change_matrix(sig)) + return 0; + + // Computation of the length of the dim 2 2^n isogeny + int pow_dim2_deg_resp = SQIsign_response_length - (int)sig->two_resp_length - (int)sig->backtracking; + + // basic sanity test: checking that the response is not too long + if (pow_dim2_deg_resp < 0) + return 0; + // The dim 2 isogeny embeds a dim 1 isogeny of odd degree, so it can + // never be of length 2. + if (pow_dim2_deg_resp == 1) + return 0; + + // check the public curve is valid + if (!ec_curve_verify_A(&(pk->curve).A)) + return 0; + + // Set auxiliary curve from the A-coefficient within the signature + ec_curve_t E_aux; + if (!ec_curve_init_from_A(&E_aux, &sig->E_aux_A)) + return 0; // invalid curve + + // checking that we are given A-coefficients and no precomputation + assert(fp2_is_one(&pk->curve.C) == 0xFFFFFFFF && !pk->curve.is_A24_computed_and_normalized); + + // computation of the challenge + ec_curve_t E_chall; + if (!compute_challenge_verify(&E_chall, sig, &pk->curve, pk->hint_pk)) { + return 0; + } + + // Computation of the canonical bases for the challenge and aux curve + ec_basis_t B_chall_can, B_aux_can; + + if (!challenge_and_aux_basis_verify(&B_chall_can, &B_aux_can, &E_chall, &E_aux, sig, pow_dim2_deg_resp)) { + return 0; + } + + // When two_resp_length != 0 we need to compute a second, short 2^r-isogeny + if (sig->two_resp_length > 0) { + if (!two_response_isogeny_verify(&E_chall, &B_chall_can, sig, pow_dim2_deg_resp)) { + return 0; + } + } + + // We can recover the commitment curve with a 2D isogeny + // The supplied signature did not compute an isogeny between eliptic products + // and so definitely is an invalid signature. + ec_curve_t E_com; + if (!compute_commitment_curve_verify(&E_com, &B_chall_can, &B_aux_can, &E_chall, &E_aux, pow_dim2_deg_resp)) + return 0; + + scalar_t chk_chall; + + // recomputing the challenge vector + hash_to_challenge(&chk_chall, pk, &E_com, m, l); + + // performing the final check + verify = mp_compare(sig->chall_coeff, chk_chall, NWORDS_ORDER) == 0; + + return verify; +} diff --git a/src/pqm4/sqisign_lvl3/ref/xeval.c b/src/pqm4/sqisign_lvl3/ref/xeval.c new file mode 100644 index 0000000..7fc7170 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/xeval.c @@ -0,0 +1,64 @@ +#include "isog.h" +#include "ec.h" +#include + +// ----------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------- + +// Degree-2 isogeny evaluation with kenerl generated by P != (0, 0) +void +xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1, t2; + for (int j = 0; j < lenQ; j++) { + fp2_add(&t0, &Q[j].x, &Q[j].z); + fp2_sub(&t1, &Q[j].x, &Q[j].z); + fp2_mul(&t2, &kps->K.x, &t1); + fp2_mul(&t1, &kps->K.z, &t0); + fp2_add(&t0, &t2, &t1); + fp2_sub(&t1, &t2, &t1); + fp2_mul(&R[j].x, &Q[j].x, &t0); + fp2_mul(&R[j].z, &Q[j].z, &t1); + } +} + +void +xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1; + for (int i = 0; i < lenQ; i++) { + fp2_mul(&t0, &Q[i].x, &Q[i].z); + fp2_mul(&t1, &kps->K.x, &Q[i].z); + fp2_add(&t1, &t1, &Q[i].x); + fp2_mul(&t1, &t1, &Q[i].x); + fp2_sqr(&R[i].x, &Q[i].z); + fp2_add(&R[i].x, &R[i].x, &t1); + fp2_mul(&R[i].z, &t0, &kps->K.z); + } +} + +// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P != (0, 0) +void +xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps) +{ + const ec_point_t *K = kps->K; + + fp2_t t0, t1; + + for (int i = 0; i < lenQ; i++) { + fp2_add(&t0, &Q[i].x, &Q[i].z); + fp2_sub(&t1, &Q[i].x, &Q[i].z); + fp2_mul(&(R[i].x), &t0, &K[1].x); + fp2_mul(&(R[i].z), &t1, &K[2].x); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &K[0].x); + fp2_add(&t1, &(R[i].x), &(R[i].z)); + fp2_sub(&(R[i].z), &(R[i].x), &(R[i].z)); + fp2_sqr(&t1, &t1); + fp2_sqr(&(R[i].z), &(R[i].z)); + fp2_add(&(R[i].x), &t0, &t1); + fp2_sub(&t0, &t0, &(R[i].z)); + fp2_mul(&(R[i].x), &(R[i].x), &t1); + fp2_mul(&(R[i].z), &(R[i].z), &t0); + } +} diff --git a/src/pqm4/sqisign_lvl3/ref/xisog.c b/src/pqm4/sqisign_lvl3/ref/xisog.c new file mode 100644 index 0000000..7242d29 --- /dev/null +++ b/src/pqm4/sqisign_lvl3/ref/xisog.c @@ -0,0 +1,61 @@ +#include "isog.h" +#include "ec.h" +#include + +// ------------------------------------------------------------------------- +// ------------------------------------------------------------------------- + +// Degree-2 isogeny with kernel generated by P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P) +{ + fp2_sqr(&B->x, &P.x); + fp2_sqr(&B->z, &P.z); + fp2_sub(&B->x, &B->z, &B->x); + fp2_add(&kps->K.x, &P.x, &P.z); + fp2_sub(&kps->K.z, &P.x, &P.z); +} + +void +xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24) +{ + // No need to check the square root, only used for signing. + fp2_t t0, four; + fp2_set_small(&four, 4); + fp2_add(&t0, &A24.x, &A24.x); + fp2_sub(&t0, &t0, &A24.z); + fp2_add(&t0, &t0, &t0); + fp2_inv(&A24.z); + fp2_mul(&t0, &t0, &A24.z); + fp2_copy(&kps->K.x, &t0); + fp2_add(&B24->x, &t0, &t0); + fp2_sqr(&t0, &t0); + fp2_sub(&t0, &t0, &four); + fp2_sqrt(&t0); + fp2_neg(&kps->K.z, &t0); + fp2_add(&B24->z, &t0, &t0); + fp2_add(&B24->x, &B24->x, &B24->z); + fp2_add(&B24->z, &B24->z, &B24->z); +} + +// Degree-4 isogeny with kernel generated by P such that [2]P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P) +{ + ec_point_t *K = kps->K; + + fp2_sqr(&K[0].x, &P.x); + fp2_sqr(&K[0].z, &P.z); + fp2_add(&K[1].x, &K[0].z, &K[0].x); + fp2_sub(&K[1].z, &K[0].z, &K[0].x); + fp2_mul(&B->x, &K[1].x, &K[1].z); + fp2_sqr(&B->z, &K[0].z); + + // Constants for xeval_4 + fp2_add(&K[2].x, &P.x, &P.z); + fp2_sub(&K[1].x, &P.x, &P.z); + fp2_add(&K[0].x, &K[0].z, &K[0].z); + fp2_add(&K[0].x, &K[0].x, &K[0].x); +} diff --git a/src/pqm4/sqisign_lvl5/ref/api.h b/src/pqm4/sqisign_lvl5/ref/api.h new file mode 100644 index 0000000..cf96baf --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/api.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef api_h +#define api_h + +#include +#include + +#define CRYPTO_SECRETKEYBYTES 701 +#define CRYPTO_PUBLICKEYBYTES 129 +#define CRYPTO_BYTES 292 + +#define CRYPTO_ALGNAME "SQIsign_lvl5" + +SQISIGN_API +int +crypto_sign_keypair(unsigned char *pk, unsigned char *sk); + +SQISIGN_API +int +crypto_sign(unsigned char *sm, size_t *smlen, + const unsigned char *m, size_t mlen, + const unsigned char *sk); + +SQISIGN_API +int +crypto_sign_open(unsigned char *m, size_t *mlen, + const unsigned char *sm, size_t smlen, + const unsigned char *pk); + +#endif /* api_h */ diff --git a/src/pqm4/sqisign_lvl5/ref/basis.c b/src/pqm4/sqisign_lvl5/ref/basis.c new file mode 100644 index 0000000..94cb7fc --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/basis.c @@ -0,0 +1,416 @@ +#include "ec.h" +#include "fp2.h" +#include "e0_basis.h" +#include + +uint32_t +ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve) +{ // Recover y-coordinate of a point on the Montgomery curve y^2 = x^3 + Ax^2 + x + fp2_t t0; + + fp2_sqr(&t0, Px); + fp2_mul(y, &t0, &curve->A); // Ax^2 + fp2_add(y, y, Px); // Ax^2 + x + fp2_mul(&t0, &t0, Px); + fp2_add(y, y, &t0); // x^3 + Ax^2 + x + // This is required, because we do not yet know that our curves are + // supersingular so our points live on the twist with B = 1. + return fp2_sqrt_verify(y); +} + +static void +difference_point(ec_point_t *PQ, const ec_point_t *P, const ec_point_t *Q, const ec_curve_t *curve) +{ + // Given P,Q in projective x-only, computes a deterministic choice for (P-Q) + // Based on Proposition 3 of https://eprint.iacr.org/2017/518.pdf + + fp2_t Bxx, Bxz, Bzz, t0, t1; + + fp2_mul(&t0, &P->x, &Q->x); + fp2_mul(&t1, &P->z, &Q->z); + fp2_sub(&Bxx, &t0, &t1); + fp2_sqr(&Bxx, &Bxx); + fp2_mul(&Bxx, &Bxx, &curve->C); // C*(P.x*Q.x-P.z*Q.z)^2 + fp2_add(&Bxz, &t0, &t1); + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + fp2_add(&Bzz, &t0, &t1); + fp2_mul(&Bxz, &Bxz, &Bzz); // (P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_sub(&Bzz, &t0, &t1); + fp2_sqr(&Bzz, &Bzz); + fp2_mul(&Bzz, &Bzz, &curve->C); // C*(P.x*Q.z-P.z*Q.x)^2 + fp2_mul(&Bxz, &Bxz, &curve->C); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &curve->A); + fp2_add(&t0, &t0, &t0); + fp2_add(&Bxz, &Bxz, &t0); // C*(P.x*Q.x+P.z*Q.z)(P.x*Q.z+P.z*Q.x) + 2*A*P.x*Q.z*P.z*Q.x + + // To ensure that the denominator is a fourth power in Fp, we normalize by + // C*C_bar^2*(P.z)_bar^2*(Q.z)_bar^2 + fp_copy(&t0.re, &curve->C.re); + fp_neg(&t0.im, &curve->C.im); + fp2_sqr(&t0, &t0); + fp2_mul(&t0, &t0, &curve->C); + fp_copy(&t1.re, &P->z.re); + fp_neg(&t1.im, &P->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp_copy(&t1.re, &Q->z.re); + fp_neg(&t1.im, &Q->z.im); + fp2_sqr(&t1, &t1); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&Bxx, &Bxx, &t0); + fp2_mul(&Bxz, &Bxz, &t0); + fp2_mul(&Bzz, &Bzz, &t0); + + // Solving quadratic equation + fp2_sqr(&t0, &Bxz); + fp2_mul(&t1, &Bxx, &Bzz); + fp2_sub(&t0, &t0, &t1); + // No need to check if t0 is square, as per the entangled basis algorithm. + fp2_sqrt(&t0); + fp2_add(&PQ->x, &Bxz, &t0); + fp2_copy(&PQ->z, &Bzz); +} + +// Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and the point +// P = (X/Z : 1). For generic implementation see lift_basis() +uint32_t +lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + assert(fp2_is_one(&B->P.z)); + assert(fp2_is_one(&E->C)); + + fp2_copy(&P->x, &B->P.x); + fp2_copy(&Q->x, &B->Q.x); + fp2_copy(&Q->z, &B->Q.z); + fp2_set_one(&P->z); + uint32_t ret = ec_recover_y(&P->y, &P->x, E); + + // Algorithm of Okeya-Sakurai to recover y.Q in the montgomery model + fp2_t v1, v2, v3, v4; + fp2_mul(&v1, &P->x, &Q->z); + fp2_add(&v2, &Q->x, &v1); + fp2_sub(&v3, &Q->x, &v1); + fp2_sqr(&v3, &v3); + fp2_mul(&v3, &v3, &B->PmQ.x); + fp2_add(&v1, &E->A, &E->A); + fp2_mul(&v1, &v1, &Q->z); + fp2_add(&v2, &v2, &v1); + fp2_mul(&v4, &P->x, &Q->x); + fp2_add(&v4, &v4, &Q->z); + fp2_mul(&v2, &v2, &v4); + fp2_mul(&v1, &v1, &Q->z); + fp2_sub(&v2, &v2, &v1); + fp2_mul(&v2, &v2, &B->PmQ.z); + fp2_sub(&Q->y, &v3, &v2); + fp2_add(&v1, &P->y, &P->y); + fp2_mul(&v1, &v1, &Q->z); + fp2_mul(&v1, &v1, &B->PmQ.z); + fp2_mul(&Q->x, &Q->x, &v1); + fp2_mul(&Q->z, &Q->z, &v1); + + // Transforming to a jacobian coordinate + fp2_sqr(&v1, &Q->z); + fp2_mul(&Q->y, &Q->y, &v1); + fp2_mul(&Q->x, &Q->x, &Q->z); + return ret; +} + +uint32_t +lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E) +{ + // Normalise the curve E such that (A : C) is (A/C : 1) + // and the point x(P) = (X/Z : 1). + fp2_t inverses[2]; + fp2_copy(&inverses[0], &B->P.z); + fp2_copy(&inverses[1], &E->C); + + fp2_batched_inv(inverses, 2); + fp2_set_one(&B->P.z); + fp2_set_one(&E->C); + + fp2_mul(&B->P.x, &B->P.x, &inverses[0]); + fp2_mul(&E->A, &E->A, &inverses[1]); + + // Lift the basis to Jacobian points P, Q + return lift_basis_normalized(P, Q, B, E); +} + +// Given an x-coordinate, determines if this is a valid +// point on the curve. Assumes C=1. +static uint32_t +is_on_curve(const fp2_t *x, const ec_curve_t *curve) +{ + assert(fp2_is_one(&curve->C)); + fp2_t t0; + + fp2_add(&t0, x, &curve->A); // x + (A/C) + fp2_mul(&t0, &t0, x); // x^2 + (A/C)*x + fp2_add_one(&t0, &t0); // x^2 + (A/C)*x + 1 + fp2_mul(&t0, &t0, x); // x^3 + (A/C)*x^2 + x + + return fp2_is_square(&t0); +} + +// Helper function which given a point of order k*2^n with n maximal +// and k odd, computes a point of order 2^f +static inline void +clear_cofactor_for_maximal_even_order(ec_point_t *P, ec_curve_t *curve, int f) +{ + // clear out the odd cofactor to get a point of order 2^n + ec_mul(P, p_cofactor_for_2f, P_COFACTOR_FOR_2F_BITLENGTH, P, curve); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_A24(P, P, &curve->A24, curve->is_A24_computed_and_normalized); + } +} + +// Helper function which finds an NQR -1 / (1 + i*b) for entangled basis generation +static uint8_t +find_nqr_factor(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + // factor = -1/(1 + i*b) for b in Fp will be NQR whenever 1 + b^2 is NQR + // in Fp, so we find one of these and then invert (1 + i*b). We store b + // as a u8 hint to save time in verification. + + // We return the hint as a u8, but use (uint16_t)n to give 2^16 - 1 + // to make failure cryptographically negligible, with a fallback when + // n > 128 is required. + uint8_t hint; + uint32_t found = 0; + uint16_t n = start; + + bool qr_b = 1; + fp_t b, tmp; + fp2_t z, t0, t1; + + do { + while (qr_b) { + // find b with 1 + b^2 a non-quadratic residue + fp_set_small(&tmp, (uint32_t)n * n + 1); + qr_b = fp_is_square(&tmp); + n++; // keeps track of b = n - 1 + } + + // for Px := -A/(1 + i*b) to be on the curve + // is equivalent to A^2*(z-1) - z^2 NQR for z = 1 + i*b + // thus prevents unnecessary inversion pre-check + + // t0 = z - 1 = i*b + // t1 = z = 1 + i*b + fp_set_small(&b, (uint32_t)n - 1); + fp2_set_zero(&t0); + fp2_set_one(&z); + fp_copy(&z.im, &b); + fp_copy(&t0.im, &b); + + // A^2*(z-1) - z^2 + fp2_sqr(&t1, &curve->A); + fp2_mul(&t0, &t0, &t1); // A^2 * (z - 1) + fp2_sqr(&t1, &z); + fp2_sub(&t0, &t0, &t1); // A^2 * (z - 1) - z^2 + found = !fp2_is_square(&t0); + + qr_b = 1; + } while (!found); + + // set Px to -A/(1 + i*b) + fp2_copy(x, &z); + fp2_inv(x); + fp2_mul(x, x, &curve->A); + fp2_neg(x, x); + + /* + * With very low probability n will not fit in 7 bits. + * We set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + hint = n <= 128 ? n - 1 : 0; + + return hint; +} + +// Helper function which finds a point x(P) = n * A +static uint8_t +find_nA_x_coord(fp2_t *x, ec_curve_t *curve, const uint8_t start) +{ + assert(!fp2_is_square(&curve->A)); // Only to be called when A is a NQR + + // when A is NQR we allow x(P) to be a multiple n*A of A + uint8_t n = start; + if (n == 1) { + fp2_copy(x, &curve->A); + } else { + fp2_mul_small(x, &curve->A, n); + } + + while (!is_on_curve(x, curve)) { + fp2_add(x, x, &curve->A); + n++; + } + + /* + * With very low probability (1/2^128), n will not fit in 7 bits. + * In this case, we set hint = 0 which signals failure and the need + * to generate a value on the fly during verification + */ + uint8_t hint = n < 128 ? n : 0; + return hint; +} + +// The entangled basis generation does not allow A = 0 +// so we simply return the one we have already precomputed +static void +ec_basis_E0_2f(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + assert(fp2_is_zero(&curve->A)); + ec_point_t P, Q; + + // Set P, Q to precomputed (X : 1) values + fp2_copy(&P.x, &BASIS_E0_PX); + fp2_copy(&Q.x, &BASIS_E0_QX); + fp2_set_one(&P.z); + fp2_set_one(&Q.z); + + // clear the power of two to get a point of order 2^f + for (int i = 0; i < TORSION_EVEN_POWER - f; i++) { + xDBL_E0(&P, &P); + xDBL_E0(&Q, &Q); + } + + // Set P, Q in the basis and compute x(P - Q) + copy_point(&PQ2->P, &P); + copy_point(&PQ2->Q, &Q); + difference_point(&PQ2->PmQ, &P, &Q, curve); +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// and stores hints as an array for faster recomputation at a later point +uint8_t +ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 0; + } + + uint8_t hint; + bool hint_A = fp2_is_square(&curve->A); + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_A) { + // when A is NQR we allow x(P) to be a multiple n*A of A + hint = find_nA_x_coord(&P.x, curve, 1); + } else { + // when A is QR we instead have to find (1 + b^2) a NQR + // such that x(P) = -A / (1 + i*b) + hint = find_nqr_factor(&P.x, curve, 1); + } + + fp2_set_one(&P.z); + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + + // Finally, we compress hint_A and hint into a single bytes. + // We choose to set the LSB of hint to hint_A + assert(hint < 128); // We expect hint to be 7-bits in size + return (hint << 1) | hint_A; +} + +// Computes a basis E[2^f] = where the point Q is above (0 : 0) +// given the hints as an array for faster basis computation +int +ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint) +{ + // Normalise (A/C : 1) and ((A + 2)/4 : 1) + ec_normalize_curve_and_A24(curve); + + if (fp2_is_zero(&curve->A)) { + ec_basis_E0_2f(PQ2, curve, f); + return 1; + } + + // The LSB of hint encodes whether A is a QR + // The remaining 7-bits are used to find a valid x(P) + bool hint_A = hint & 1; + uint8_t hint_P = hint >> 1; + + // Compute the points P, Q + ec_point_t P, Q; + + if (!hint_P) { + // When hint_P = 0 it means we did not find a point in 128 attempts + // this is very rare and we almost never expect to need this fallback + // In either case, we can start with b = 128 to skip testing the known + // values which will not work + if (!hint_A) { + find_nA_x_coord(&P.x, curve, 128); + } else { + find_nqr_factor(&P.x, curve, 128); + } + } else { + // Otherwise we use the hint to directly find x(P) based on hint_A + if (!hint_A) { + // when A is NQR, we have found n such that x(P) = n*A + fp2_mul_small(&P.x, &curve->A, hint_P); + } else { + // when A is QR we have found b such that (1 + b^2) is a NQR in + // Fp, so we must compute x(P) = -A / (1 + i*b) + fp_set_one(&P.x.re); + fp_set_small(&P.x.im, hint_P); + fp2_inv(&P.x); + fp2_mul(&P.x, &P.x, &curve->A); + fp2_neg(&P.x, &P.x); + } + } + fp2_set_one(&P.z); + +#ifndef NDEBUG + int passed = 1; + passed = is_on_curve(&P.x, curve); + passed &= !fp2_is_square(&P.x); + + if (!passed) + return 0; +#endif + + // set xQ to -xP - A + fp2_add(&Q.x, &curve->A, &P.x); + fp2_neg(&Q.x, &Q.x); + fp2_set_one(&Q.z); + + // clear out the odd cofactor to get a point of order 2^f + clear_cofactor_for_maximal_even_order(&P, curve, f); + clear_cofactor_for_maximal_even_order(&Q, curve, f); + + // compute PmQ, set PmQ to Q to ensure Q above (0,0) + difference_point(&PQ2->Q, &P, &Q, curve); + copy_point(&PQ2->P, &P); + copy_point(&PQ2->PmQ, &Q); + +#ifndef NDEBUG + passed &= test_basis_order_twof(PQ2, curve, f); + + if (!passed) + return 0; +#endif + + return 1; +} diff --git a/src/pqm4/sqisign_lvl5/ref/common.c b/src/pqm4/sqisign_lvl5/ref/common.c new file mode 100644 index 0000000..d393e9c --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/common.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +void +public_key_init(public_key_t *pk) +{ + ec_curve_init(&pk->curve); +} + +void +public_key_finalize(public_key_t *pk) +{ +} + +// compute the challenge as the hash of the message and the commitment curve and public key +void +hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length) +{ + unsigned char buf[2 * FP2_ENCODED_BYTES]; + { + fp2_t j1, j2; + ec_j_inv(&j1, &pk->curve); + ec_j_inv(&j2, com_curve); + fp2_encode(buf, &j1); + fp2_encode(buf + FP2_ENCODED_BYTES, &j2); + } + + { + // The type scalar_t represents an element of GF(p), which is about + // 2*lambda bits, where lambda = 128, 192 or 256, according to the + // security level. Thus, the variable scalar should have enough memory + // for the values produced by SHAKE256 in the intermediate iterations. + + shake256incctx ctx; + + size_t hash_bytes = ((2 * SECURITY_BITS) + 7) / 8; + size_t limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + size_t bits = (2 * SECURITY_BITS) % RADIX; + digit_t mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, buf, 2 * FP2_ENCODED_BYTES); + shake256_inc_absorb(&ctx, message, length); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + for (int i = 2; i < HASH_ITERATIONS; i++) { + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + } + shake256_inc_init(&ctx); + shake256_inc_absorb(&ctx, (void *)(*scalar), hash_bytes); + shake256_inc_finalize(&ctx); + + hash_bytes = ((TORSION_EVEN_POWER - SQIsign_response_length) + 7) / 8; + limbs = (hash_bytes + sizeof(digit_t) - 1) / sizeof(digit_t); + bits = (TORSION_EVEN_POWER - SQIsign_response_length) % RADIX; + mask = ((digit_t)-1) >> ((RADIX - bits) % RADIX); +#ifdef TARGET_BIG_ENDIAN + mask = BSWAP_DIGIT(mask); +#endif + + memset(*scalar, 0, NWORDS_ORDER * sizeof(digit_t)); + shake256_inc_squeeze((void *)(*scalar), hash_bytes, &ctx); + (*scalar)[limbs - 1] &= mask; + +#ifdef TARGET_BIG_ENDIAN + for (int i = 0; i < NWORDS_ORDER; i++) + (*scalar)[i] = BSWAP_DIGIT((*scalar)[i]); +#endif + + mp_mod_2exp(*scalar, SECURITY_BITS, NWORDS_ORDER); + } +} diff --git a/src/pqm4/sqisign_lvl5/ref/config.mk b/src/pqm4/sqisign_lvl5/ref/config.mk new file mode 100644 index 0000000..7eead5b --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/config.mk @@ -0,0 +1,2 @@ +elf/crypto_sign_sqisign_lvl5_ref_%.elf: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl5 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS +obj/libcrypto_sign_sqisign_lvl5_ref.a: CPPFLAGS+=-DRADIX_32 -DSQISIGN_BUILD_TYPE_REF -DSQISIGN_GF_IMPL_REF -DSQISIGN_VARIANT=lvl5 -DTARGET_ARM -DTARGET_OS_OTHER -DNDEBUG -DDISABLE_NAMESPACING -DBIG_PUBLIC_KEY_TESTS diff --git a/src/pqm4/sqisign_lvl5/ref/e0_basis.c b/src/pqm4/sqisign_lvl5/ref/e0_basis.c new file mode 100644 index 0000000..a7148e4 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/e0_basis.c @@ -0,0 +1,55 @@ +#include +const fp2_t BASIS_E0_PX = { +#if 0 +#elif RADIX == 16 +{0x1099, 0xa9f, 0x14f8, 0x1537, 0x1a13, 0x97e, 0x1095, 0xc8b, 0xdd2, 0x1c5f, 0xbdf, 0x1344, 0x1330, 0x1733, 0x185d, 0x1b08, 0x464, 0x76f, 0xe44, 0x3fc, 0x1dc0, 0x1c62, 0x88, 0x972, 0x13f4, 0x18c8, 0x6bd, 0x804, 0x1269, 0x19e0, 0x14bd, 0x10a1, 0xe5e, 0x1af2, 0x156c, 0x3f7, 0x16a1, 0x47d, 0x314} +#elif RADIX == 32 +{0x184cba61, 0xf4f854f, 0x1fb42753, 0x45c2552, 0x1c5f6e93, 0x2688bdf, 0xedcce66, 0x64d8461, 0x1c8876f2, 0x177007f8, 0x12044718, 0x1913f44b, 0x10d7b8, 0x1cf049a5, 0x1d0a1a5e, 0x1b35e4e5, 0x1508fdea, 0x66d} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x27537a7c2a7e132e, 0x7dba4c8b84aa5fb4, 0xedcce6613445eff1, 0xc7221dbc8c9b08c2, 0x8972044718bb803f, 0x24d280435ee3227e, 0x6bc9cbd0a1a5ee78, 0x1011f6d423f7ab6} +#else +{0xa6f4f854fc265d, 0x4c8b84aa5fb427, 0x1309a22f7f8bedd, 0x12326c230bb7339, 0x1177007f8e443b7, 0x3227e897204471, 0x173c12694021af7, 0xd9af272f428697, 0x523eda847ef5} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x4b1, 0x178f, 0x107b, 0x6f6, 0x75e, 0x1b27, 0x4db, 0x1e1b, 0xd78, 0x15b6, 0x1130, 0x8cc, 0x1ac0, 0x9b7, 0x692, 0x1e07, 0x1f4, 0xfd7, 0x2ab, 0x7b5, 0x1040, 0xa43, 0xb6d, 0x13a1, 0x1422, 0x10c9, 0x10b0, 0x1540, 0x827, 0xa69, 0x1761, 0x1f25, 0x1d16, 0x16f2, 0x1fcb, 0x92, 0xcba, 0x1c03, 0x3c7} +#elif RADIX == 32 +{0x1258c7b1, 0xd07bbc7, 0x9cebc6f, 0x10d936f6, 0x15b66bc7, 0x1199130, 0x926df58, 0x1f4f039a, 0x556fd70, 0x1c100f6a, 0x15b6a90, 0x1934229d, 0x15021610, 0x1534a09e, 0xdf25bb0, 0x12ede5d1, 0x5d024bf, 0xa9b} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xbc6f683dde3c9631, 0xd9af1e1b26dec9ce, 0x926df5808cc89856, 0x5155bf5c3e9e0734, 0x53a15b6a90e0807b, 0x504f540858432684, 0xdbcba2df25bb0a9a, 0x18f00d974092fe5} +#else +{0xded07bbc792c63, 0x11e1b26dec9cebc, 0xc046644c2b6cd7, 0x10fa781cd249b7d, 0x1c100f6a2ab7eb, 0x3268453a15b6a9, 0x54d2827aa042c2, 0x1976f2e8b7c96ec, 0x16e01b2e8125f} +#endif +#endif +}; +const fp2_t BASIS_E0_QX = { +#if 0 +#elif RADIX == 16 +{0x15c, 0x865, 0x1af6, 0x17b9, 0x6a2, 0x1c22, 0x17c5, 0x1149, 0xa7, 0x151e, 0xe57, 0x4c2, 0x18cd, 0xbd2, 0x7a4, 0x7c6, 0x74a, 0xd2, 0x902, 0x68c, 0x21e, 0x1e44, 0x1f5a, 0x1d4c, 0x115b, 0x1777, 0x16d4, 0x503, 0x3af, 0x7e4, 0x1aa7, 0x3dd, 0x827, 0x186b, 0x765, 0x1fc5, 0xc78, 0x9bd, 0xfe} +#elif RADIX == 32 +{0x10ae12d6, 0x13af6432, 0x88d457b, 0xa4df178, 0x151e053c, 0x14984e57, 0x122f4b19, 0x14a3e31e, 0x12040d23, 0x878d18, 0xcfad791, 0xef15bea, 0x140eda97, 0x13f20ebc, 0xe3ddd53, 0x1970d682, 0x3c7f14e, 0x4eb} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x457b9d7b21942b84, 0x7814f149be2f088d, 0x22f4b19a4c272bd4, 0xc4810348e947c63d, 0x7d4cfad791043c68, 0x75e503b6a5dde2b, 0xe1ad04e3ddd539f9, 0x1326f58f1fc53b2} +#else +{0xf73af643285709, 0xf149be2f088d45, 0xcd261395ea3c0a, 0x3a51f18f48bd2c, 0x20878d18902069, 0x1dde2b7d4cfad79, 0x1cfc83af281db52, 0xcb86b4138f7754, 0xb4deb1e3f8a7} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x6ac, 0x25e, 0xc7a, 0x1492, 0xd01, 0xbc0, 0x118, 0x376, 0x3e0, 0x7ae, 0x573, 0x171f, 0x35a, 0x1725, 0x48f, 0xc94, 0x133c, 0x16a4, 0x10a8, 0x178d, 0xdd7, 0x798, 0x1d05, 0x39f, 0xc2a, 0x179c, 0x407, 0xd3, 0x118a, 0x1c9f, 0xeac, 0x145b, 0xc35, 0x11a2, 0x58b, 0xe4, 0x5e3, 0xae7, 0x330} +#elif RADIX == 32 +{0x3563c78, 0x4c7a12f, 0x101a0349, 0x1bb04617, 0x7ae1f00, 0xae3e573, 0x7dc946b, 0x13c64a12, 0x1516a49, 0x375ef1b, 0x1fe829e6, 0x138c2a1c, 0x34c80f7, 0xe4fc628, 0xb45b756, 0x2e344c3, 0xf18390b, 0x339} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x349263d0978d58f, 0xb87c037608c2f01a, 0x7dc946b571f2b99e, 0xd8545a92678c9424, 0x439fe829e61baf78, 0xe3140d3203de7185, 0xc68986b45b756727, 0x32b9cbc60e42c5} +#else +{0x924c7a12f1ab1e, 0x37608c2f01a03, 0x15ab8f95ccf5c3e, 0x99e325091f7251, 0xc375ef1b0a8b52, 0x1e7185439fe829e, 0x1393f18a069901e, 0x1171a261ad16dd5, 0x6573978c1c85} +#endif +#endif +}; diff --git a/src/pqm4/sqisign_lvl5/ref/e0_basis.h b/src/pqm4/sqisign_lvl5/ref/e0_basis.h new file mode 100644 index 0000000..05cafb8 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/e0_basis.h @@ -0,0 +1,3 @@ +#include +extern const fp2_t BASIS_E0_PX; +extern const fp2_t BASIS_E0_QX; diff --git a/src/pqm4/sqisign_lvl5/ref/ec.c b/src/pqm4/sqisign_lvl5/ref/ec.c new file mode 100644 index 0000000..be4e4e5 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/ec.c @@ -0,0 +1,665 @@ +#include +#include +#include +#include + +void +ec_point_init(ec_point_t *P) +{ // Initialize point as identity element (1:0) + fp2_set_one(&(P->x)); + fp2_set_zero(&(P->z)); +} + +void +ec_curve_init(ec_curve_t *E) +{ // Initialize the curve struct + // Initialize the constants + fp2_set_zero(&(E->A)); + fp2_set_one(&(E->C)); + + // Initialize the point (A+2 : 4C) + ec_point_init(&(E->A24)); + + // Set the bool to be false by default + E->is_A24_computed_and_normalized = false; +} + +void +select_point(ec_point_t *Q, const ec_point_t *P1, const ec_point_t *P2, const digit_t option) +{ // Select points in constant time + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +cswap_points(ec_point_t *P, ec_point_t *Q, const digit_t option) +{ // Swap points in constant time + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + fp2_cswap(&(P->x), &(Q->x), option); + fp2_cswap(&(P->z), &(Q->z), option); +} + +void +ec_normalize_point(ec_point_t *P) +{ + fp2_inv(&P->z); + fp2_mul(&P->x, &P->x, &P->z); + fp2_set_one(&(P->z)); +} + +void +ec_normalize_curve(ec_curve_t *E) +{ + fp2_inv(&E->C); + fp2_mul(&E->A, &E->A, &E->C); + fp2_set_one(&E->C); +} + +void +ec_curve_normalize_A24(ec_curve_t *E) +{ + if (!E->is_A24_computed_and_normalized) { + AC_to_A24(&E->A24, E); + ec_normalize_point(&E->A24); + E->is_A24_computed_and_normalized = true; + } + assert(fp2_is_one(&E->A24.z)); +} + +void +ec_normalize_curve_and_A24(ec_curve_t *E) +{ // Neither the curve or A24 are guaranteed to be normalized. + // First we normalize (A/C : 1) and conditionally compute + if (!fp2_is_one(&E->C)) { + ec_normalize_curve(E); + } + + if (!E->is_A24_computed_and_normalized) { + // Now compute A24 = ((A + 2) / 4 : 1) + fp2_add_one(&E->A24.x, &E->A); // re(A24.x) = re(A) + 1 + fp2_add_one(&E->A24.x, &E->A24.x); // re(A24.x) = re(A) + 2 + fp_copy(&E->A24.x.im, &E->A.im); // im(A24.x) = im(A) + + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 2 + fp2_half(&E->A24.x, &E->A24.x); // (A + 2) / 4 + fp2_set_one(&E->A24.z); + + E->is_A24_computed_and_normalized = true; + } +} + +uint32_t +ec_is_zero(const ec_point_t *P) +{ + return fp2_is_zero(&P->z); +} + +uint32_t +ec_has_zero_coordinate(const ec_point_t *P) +{ + return fp2_is_zero(&P->x) | fp2_is_zero(&P->z); +} + +uint32_t +ec_is_equal(const ec_point_t *P, const ec_point_t *Q) +{ // Evaluate if two points in Montgomery coordinates (X:Z) are equal + // Returns 0xFFFFFFFF (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1; + + // Check if P, Q are the points at infinity + uint32_t l_zero = ec_is_zero(P); + uint32_t r_zero = ec_is_zero(Q); + + // Check if PX * QZ = QX * PZ + fp2_mul(&t0, &P->x, &Q->z); + fp2_mul(&t1, &P->z, &Q->x); + uint32_t lr_equal = fp2_is_equal(&t0, &t1); + + // Points are equal if + // - Both are zero, or + // - neither are zero AND PX * QZ = QX * PZ + return (l_zero & r_zero) | (~l_zero & ~r_zero * lr_equal); +} + +uint32_t +ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + if (ec_is_zero(P)) + return 0; + + uint32_t x_is_zero, tmp_is_zero; + fp2_t t0, t1, t2; + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t0, &t1); + fp2_mul(&t2, &t2, &E->A); + fp2_mul(&t1, &t1, &E->C); + fp2_add(&t1, &t1, &t1); + fp2_add(&t0, &t1, &t2); // 4 (CX^2+CZ^2+AXZ) + + x_is_zero = fp2_is_zero(&P->x); + tmp_is_zero = fp2_is_zero(&t0); + + // two torsion if x or x^2 + Ax + 1 is zero + return x_is_zero | tmp_is_zero; +} + +uint32_t +ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E) +{ + ec_point_t test; + xDBL_A24(&test, P, &E->A24, E->is_A24_computed_and_normalized); + return ec_is_two_torsion(&test, E); +} + +uint32_t +ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E) +{ // Check if basis points (P, Q) form a full 2^t-basis + ec_point_t P2, Q2; + xDBL_A24(&P2, &B->P, &E->A24, E->is_A24_computed_and_normalized); + xDBL_A24(&Q2, &B->Q, &E->A24, E->is_A24_computed_and_normalized); + return (ec_is_two_torsion(&P2, E) & ec_is_two_torsion(&Q2, E) & ~ec_is_equal(&P2, &Q2)); +} + +int +ec_curve_verify_A(const fp2_t *A) +{ // Verify the Montgomery coefficient A is valid (A^2-4 \ne 0) + // Return 1 if curve is valid, 0 otherwise + fp2_t t; + fp2_set_one(&t); + fp_add(&t.re, &t.re, &t.re); // t=2 + if (fp2_is_equal(A, &t)) + return 0; + fp_neg(&t.re, &t.re); // t=-2 + if (fp2_is_equal(A, &t)) + return 0; + return 1; +} + +int +ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A) +{ // Initialize the curve from the A coefficient and check it is valid + // Return 1 if curve is valid, 0 otherwise + ec_curve_init(E); + fp2_copy(&E->A, A); // Set A + return ec_curve_verify_A(A); +} + +void +ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve) +{ // j-invariant computation for Montgommery coefficient A2=(A+2C:4C) + fp2_t t0, t1; + + fp2_sqr(&t1, &curve->C); + fp2_sqr(j_inv, &curve->A); + fp2_add(&t0, &t1, &t1); + fp2_sub(&t0, j_inv, &t0); + fp2_sub(&t0, &t0, &t1); + fp2_sub(j_inv, &t0, &t1); + fp2_sqr(&t1, &t1); + fp2_mul(j_inv, j_inv, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_sqr(&t1, &t0); + fp2_mul(&t0, &t0, &t1); + fp2_add(&t0, &t0, &t0); + fp2_add(&t0, &t0, &t0); + fp2_inv(j_inv); + fp2_mul(j_inv, &t0, j_inv); +} + +void +xDBL_E0(ec_point_t *Q, const ec_point_t *P) +{ // Doubling of a Montgomery point in projective coordinates (X:Z) on the curve E0 with (A:C) = (0:1). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C) = (0:1). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&Q->z, &t1, &t2); + fp2_mul(&Q->z, &Q->z, &t2); +} + +void +xDBL(ec_point_t *Q, const ec_point_t *P, const ec_point_t *AC) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). Computation of coefficient values A+2C and 4C + // on-the-fly. + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and Montgomery curve constants (A:C). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + fp2_add(&t3, &AC->z, &AC->z); + fp2_mul(&t1, &t1, &t3); + fp2_add(&t1, &t1, &t1); + fp2_mul(&Q->x, &t0, &t1); + fp2_add(&t0, &t3, &AC->x); + fp2_mul(&t0, &t0, &t2); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (XP:ZP), where xP=XP/ZP, and + // the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery x-coordinates Q <- 2*P = (XQ:ZQ) such that x(2P)=XQ/ZQ. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sqr(&t0, &t0); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&t1, &t1); + fp2_sub(&t2, &t0, &t1); + if (!A24_normalized) + fp2_mul(&t1, &t1, &A24->z); + fp2_mul(&Q->x, &t0, &t1); + fp2_mul(&t0, &t2, &A24->x); + fp2_add(&t0, &t0, &t1); + fp2_mul(&Q->z, &t0, &t2); +} + +void +xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ) +{ // Differential addition of Montgomery points in projective coordinates (X:Z). + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and difference + // PQ=P-Q=(XPQ:ZPQ). + // Output: projective Montgomery point R <- P+Q = (XR:ZR) such that x(P+Q)=XR/ZR. + fp2_t t0, t1, t2, t3; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_add(&t2, &Q->x, &Q->z); + fp2_sub(&t3, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t3); + fp2_mul(&t1, &t1, &t2); + fp2_add(&t2, &t0, &t1); + fp2_sub(&t3, &t0, &t1); + fp2_sqr(&t2, &t2); + fp2_sqr(&t3, &t3); + fp2_mul(&t2, &PQ->z, &t2); + fp2_mul(&R->z, &PQ->x, &t3); + fp2_copy(&R->x, &t2); +} + +void +xDBLADD(ec_point_t *R, + ec_point_t *S, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_point_t *A24, + const bool A24_normalized) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, the difference + // PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C:4C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points R <- 2*P = (XR:ZR) such that x(2P)=XR/ZR, and S <- P+Q = (XS:ZS) such that = + // x(Q+P)=XS/ZS. + fp2_t t0, t1, t2; + + fp2_add(&t0, &P->x, &P->z); + fp2_sub(&t1, &P->x, &P->z); + fp2_sqr(&R->x, &t0); + fp2_sub(&t2, &Q->x, &Q->z); + fp2_add(&S->x, &Q->x, &Q->z); + fp2_mul(&t0, &t0, &t2); + fp2_sqr(&R->z, &t1); + fp2_mul(&t1, &t1, &S->x); + fp2_sub(&t2, &R->x, &R->z); + if (!A24_normalized) + fp2_mul(&R->z, &R->z, &A24->z); + fp2_mul(&R->x, &R->x, &R->z); + fp2_mul(&S->x, &A24->x, &t2); + fp2_sub(&S->z, &t0, &t1); + fp2_add(&R->z, &R->z, &S->x); + fp2_add(&S->x, &t0, &t1); + fp2_mul(&R->z, &R->z, &t2); + fp2_sqr(&S->z, &S->z); + fp2_sqr(&S->x, &S->x); + fp2_mul(&S->z, &S->z, &PQ->x); + fp2_mul(&S->x, &S->x, &PQ->z); +} + +void +xMUL(ec_point_t *Q, const ec_point_t *P, const digit_t *k, const int kbits, const ec_curve_t *curve) +{ // The Montgomery ladder + // Input: projective Montgomery point P=(XP:ZP) such that xP=XP/ZP, a scalar k of bitlength kbits, and + // the Montgomery curve constants (A:C) (or A24 = (A+2C/4C:1) if normalized). + // Output: projective Montgomery points Q <- k*P = (XQ:ZQ) such that x(k*P)=XQ/ZQ. + ec_point_t R0, R1, A24; + digit_t mask; + unsigned int bit, prevbit = 0, swap; + + if (!curve->is_A24_computed_and_normalized) { + // Computation of A24=(A+2C:4C) + fp2_add(&A24.x, &curve->C, &curve->C); + fp2_add(&A24.z, &A24.x, &A24.x); + fp2_add(&A24.x, &A24.x, &curve->A); + } else { + fp2_copy(&A24.x, &curve->A24.x); + fp2_copy(&A24.z, &curve->A24.z); + // Assert A24 has been normalised + assert(fp2_is_one(&A24.z)); + } + + // R0 <- (1:0), R1 <- P + ec_point_init(&R0); + fp2_copy(&R1.x, &P->x); + fp2_copy(&R1.z, &P->z); + + // Main loop + for (int i = kbits - 1; i >= 0; i--) { + bit = (k[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t)swap; + + cswap_points(&R0, &R1, mask); + xDBLADD(&R0, &R1, &R0, &R1, P, &A24, true); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t)swap; + cswap_points(&R0, &R1, mask); + + fp2_copy(&Q->x, &R0.x); + fp2_copy(&Q->z, &R0.z); +} + +int +xDBLMUL(ec_point_t *S, + const ec_point_t *P, + const digit_t *k, + const ec_point_t *Q, + const digit_t *l, + const ec_point_t *PQ, + const int kbits, + const ec_curve_t *curve) +{ // The Montgomery biladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, scalars k and l of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants (A:C). + // Output: projective Montgomery point S <- k*P + l*Q = (XS:ZS) such that x(k*P + l*Q)=XS/ZS. + + int i, A_is_zero; + digit_t evens, mevens, bitk0, bitl0, maskk, maskl, temp, bs1_ip1, bs2_ip1, bs1_i, bs2_i, h; + digit_t sigma[2] = { 0 }, pre_sigma = 0; + digit_t k_t[NWORDS_ORDER], l_t[NWORDS_ORDER], one[NWORDS_ORDER] = { 0 }, r[2 * BITS] = { 0 }; + ec_point_t DIFF1a, DIFF1b, DIFF2a, DIFF2b, R[3] = { 0 }, T[3]; + + // differential additions formulas are invalid in this case + if (ec_has_zero_coordinate(P) | ec_has_zero_coordinate(Q) | ec_has_zero_coordinate(PQ)) + return 0; + + // Derive sigma according to parity + bitk0 = (k[0] & 1); + bitl0 = (l[0] & 1); + maskk = 0 - bitk0; // Parity masks: 0 if even, otherwise 1...1 + maskl = 0 - bitl0; + sigma[0] = (bitk0 ^ 1); + sigma[1] = (bitl0 ^ 1); + evens = sigma[0] + sigma[1]; // Count number of even scalars + mevens = 0 - (evens & 1); // Mask mevens <- 0 if # even of scalars = 0 or 2, otherwise mevens = 1...1 + + // If k and l are both even or both odd, pick sigma = (0,1) + sigma[0] = (sigma[0] & mevens); + sigma[1] = (sigma[1] & mevens) | (1 & ~mevens); + + // Convert even scalars to odd + one[0] = 1; + mp_sub(k_t, k, one, NWORDS_ORDER); + mp_sub(l_t, l, one, NWORDS_ORDER); + select_ct(k_t, k_t, k, maskk, NWORDS_ORDER); + select_ct(l_t, l_t, l, maskl, NWORDS_ORDER); + + // Scalar recoding + for (i = 0; i < kbits; i++) { + // If sigma[0] = 1 swap k_t and l_t + maskk = 0 - (sigma[0] ^ pre_sigma); + swap_ct(k_t, l_t, maskk, NWORDS_ORDER); + + if (i == kbits - 1) { + bs1_ip1 = 0; + bs2_ip1 = 0; + } else { + bs1_ip1 = mp_shiftr(k_t, 1, NWORDS_ORDER); + bs2_ip1 = mp_shiftr(l_t, 1, NWORDS_ORDER); + } + bs1_i = k_t[0] & 1; + bs2_i = l_t[0] & 1; + + r[2 * i] = bs1_i ^ bs1_ip1; + r[2 * i + 1] = bs2_i ^ bs2_ip1; + + // Revert sigma if second bit, r_(2i+1), is 1 + pre_sigma = sigma[0]; + maskk = 0 - r[2 * i + 1]; + select_ct(&temp, &sigma[0], &sigma[1], maskk, 1); + select_ct(&sigma[1], &sigma[1], &sigma[0], maskk, 1); + sigma[0] = temp; + } + + // Point initialization + ec_point_init(&R[0]); + maskk = 0 - sigma[0]; + select_point(&R[1], P, Q, maskk); + select_point(&R[2], Q, P, maskk); + + fp2_copy(&DIFF1a.x, &R[1].x); + fp2_copy(&DIFF1a.z, &R[1].z); + fp2_copy(&DIFF1b.x, &R[2].x); + fp2_copy(&DIFF1b.z, &R[2].z); + + // Initialize DIFF2a <- P+Q, DIFF2b <- P-Q + xADD(&R[2], &R[1], &R[2], PQ); + if (ec_has_zero_coordinate(&R[2])) + return 0; // non valid formulas + + fp2_copy(&DIFF2a.x, &R[2].x); + fp2_copy(&DIFF2a.z, &R[2].z); + fp2_copy(&DIFF2b.x, &PQ->x); + fp2_copy(&DIFF2b.z, &PQ->z); + + A_is_zero = fp2_is_zero(&curve->A); + + // Main loop + for (i = kbits - 1; i >= 0; i--) { + h = r[2 * i] + r[2 * i + 1]; // in {0, 1, 2} + maskk = 0 - (h & 1); + select_point(&T[0], &R[0], &R[1], maskk); + maskk = 0 - (h >> 1); + select_point(&T[0], &T[0], &R[2], maskk); + if (A_is_zero) { + xDBL_E0(&T[0], &T[0]); + } else { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(&T[0], &T[0], &curve->A24, true); + } + + maskk = 0 - r[2 * i + 1]; // in {0, 1} + select_point(&T[1], &R[0], &R[1], maskk); + select_point(&T[2], &R[1], &R[2], maskk); + + cswap_points(&DIFF1a, &DIFF1b, maskk); + xADD(&T[1], &T[1], &T[2], &DIFF1a); + xADD(&T[2], &R[0], &R[2], &DIFF2a); + + // If hw (mod 2) = 1 then swap DIFF2a and DIFF2b + maskk = 0 - (h & 1); + cswap_points(&DIFF2a, &DIFF2b, maskk); + + // R <- T + copy_point(&R[0], &T[0]); + copy_point(&R[1], &T[1]); + copy_point(&R[2], &T[2]); + } + + // Output R[evens] + select_point(S, &R[0], &R[1], mevens); + + maskk = 0 - (bitk0 & bitl0); + select_point(S, S, &R[2], maskk); + return 1; +} + +int +ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *E) +{ // The 3-point Montgomery ladder + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, a scalar k of + // bitlength kbits, the difference PQ=P-Q=(XPQ:ZPQ), and the Montgomery curve constants A24 = (A+2C/4C:1). + // Output: projective Montgomery point R <- P + m*Q = (XR:ZR) such that x(P + m*Q)=XR/ZR. + assert(E->is_A24_computed_and_normalized); + if (!fp2_is_one(&E->A24.z)) { + return 0; + } + // Formulas are not valid in that case + if (ec_has_zero_coordinate(PQ)) { + return 0; + } + + ec_point_t X0, X1, X2; + copy_point(&X0, Q); + copy_point(&X1, P); + copy_point(&X2, PQ); + + int i, j; + digit_t t; + for (i = 0; i < NWORDS_ORDER; i++) { + t = 1; + for (j = 0; j < RADIX; j++) { + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + xDBLADD(&X0, &X1, &X0, &X1, &X2, &E->A24, true); + cswap_points(&X1, &X2, -((t & m[i]) == 0)); + t <<= 1; + }; + }; + copy_point(R, &X1); + return 1; +} + +// WRAPPERS to export + +void +ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve) +{ + // If A24 = ((A+2)/4 : 1) we save multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + } else { + // Otherwise we compute A24 on the fly for doubling + xDBL(res, P, (const ec_point_t *)curve); + } +} + +void +ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve) +{ + if (n == 0) { + copy_point(res, P); + return; + } + + // When the chain is long enough, we should normalise A24 + if (n > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is normalized we can save some multiplications + if (curve->is_A24_computed_and_normalized) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, P, &curve->A24, true); + for (int i = 0; i < n - 1; i++) { + assert(fp2_is_one(&curve->A24.z)); + xDBL_A24(res, res, &curve->A24, true); + } + } else { + // Otherwise we do normal doubling + xDBL(res, P, (const ec_point_t *)curve); + for (int i = 0; i < n - 1; i++) { + xDBL(res, res, (const ec_point_t *)curve); + } + } +} + +void +ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve) +{ + ec_dbl_iter(&res->P, n, &B->P, curve); + ec_dbl_iter(&res->Q, n, &B->Q, curve); + ec_dbl_iter(&res->PmQ, n, &B->PmQ, curve); +} + +void +ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve) +{ + // For large scalars it's worth normalising anyway + if (kbits > 50) { + ec_curve_normalize_A24(curve); + } + + // When A24 is computed and normalized we save some Fp2 multiplications + xMUL(res, P, scalar, kbits, curve); +} + +int +ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve) +{ + if (fp2_is_zero(&PQ->PmQ.z)) + return 0; + + /* Differential additions behave badly when PmQ = (0:1), so we need to + * treat this case specifically. Since we assume P, Q are a basis, this + * can happen only if kbits==1 */ + if (kbits == 1) { + // Sanity check: our basis should be given by 2-torsion points + if (!ec_is_two_torsion(&PQ->P, curve) || !ec_is_two_torsion(&PQ->Q, curve) || + !ec_is_two_torsion(&PQ->PmQ, curve)) + return 0; + digit_t bP, bQ; + bP = (scalarP[0] & 1); + bQ = (scalarQ[0] & 1); + if (bP == 0 && bQ == 0) + ec_point_init(res); //(1: 0) + else if (bP == 1 && bQ == 0) + copy_point(res, &PQ->P); + else if (bP == 0 && bQ == 1) + copy_point(res, &PQ->Q); + else if (bP == 1 && bQ == 1) + copy_point(res, &PQ->PmQ); + else // should never happen + assert(0); + return 1; + } else { + ec_curve_t E; + copy_curve(&E, curve); + + if (!fp2_is_zero(&curve->A)) { // If A is not zero normalize + ec_curve_normalize_A24(&E); + } + return xDBLMUL(res, &PQ->P, scalarP, &PQ->Q, scalarQ, &PQ->PmQ, kbits, (const ec_curve_t *)&E); + } +} diff --git a/src/pqm4/sqisign_lvl5/ref/ec.h b/src/pqm4/sqisign_lvl5/ref/ec.h new file mode 100644 index 0000000..ee2be38 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/ec.h @@ -0,0 +1,668 @@ +/** @file + * + * @authors Luca De Feo, Francisco RH + * + * @brief Elliptic curve stuff + */ + +#ifndef EC_H +#define EC_H +#include +#include +#include +#include +#include + +/** @defgroup ec Elliptic curves + * @{ + */ + +/** @defgroup ec_t Data structures + * @{ + */ + +/** @brief Projective point on the Kummer line E/pm 1 in Montgomery coordinates + * + * @typedef ec_point_t + * + * @struct ec_point_t + * + * A projective point in (X:Z) or (X:Y:Z) coordinates (tbd). + */ +typedef struct ec_point_t +{ + fp2_t x; + fp2_t z; +} ec_point_t; + +/** @brief Projective point in Montgomery coordinates + * + * @typedef jac_point_t + * + * @struct jac_point_t + * + * A projective point in (X:Y:Z) coordinates + */ +typedef struct jac_point_t +{ + fp2_t x; + fp2_t y; + fp2_t z; +} jac_point_t; + +/** @brief Addition components + * + * @typedef add_components_t + * + * @struct add_components_t + * + * 3 components u,v,w that define the (X:Z) coordinates of both + * addition and substraction of two distinct points with + * P+Q =(u-v:w) and P-Q = (u+v=w) + */ +typedef struct add_components_t +{ + fp2_t u; + fp2_t v; + fp2_t w; +} add_components_t; + +/** @brief A basis of a torsion subgroup + * + * @typedef ec_basis_t + * + * @struct ec_basis_t + * + * A pair of points (or a triplet, tbd) forming a basis of a torsion subgroup. + */ +typedef struct ec_basis_t +{ + ec_point_t P; + ec_point_t Q; + ec_point_t PmQ; +} ec_basis_t; + +/** @brief An elliptic curve + * + * @typedef ec_curve_t + * + * @struct ec_curve_t + * + * An elliptic curve in projective Montgomery form + */ +typedef struct ec_curve_t +{ + fp2_t A; + fp2_t C; ///< cannot be 0 + ec_point_t A24; // the point (A+2 : 4C) + bool is_A24_computed_and_normalized; // says if A24 has been computed and normalized +} ec_curve_t; + +/** @brief An isogeny of degree a power of 2 + * + * @typedef ec_isog_even_t + * + * @struct ec_isog_even_t + */ +typedef struct ec_isog_even_t +{ + ec_curve_t curve; ///< The domain curve + ec_point_t kernel; ///< A kernel generator + unsigned length; ///< The length as a 2-isogeny walk +} ec_isog_even_t; + +/** @brief Isomorphism of Montgomery curves + * + * @typedef ec_isom_t + * + * @struct ec_isom_t + * + * The isomorphism is given by the map maps (X:Z) ↦ ( (Nx X + Nz Z) : (D Z) ) + */ +typedef struct ec_isom_t +{ + fp2_t Nx; + fp2_t Nz; + fp2_t D; +} ec_isom_t; + +// end ec_t +/** @} + */ + +/** @defgroup ec_curve_t Curves and isomorphisms + * @{ + */ + +// Initalisation for curves and points +void ec_curve_init(ec_curve_t *E); +void ec_point_init(ec_point_t *P); + +/** + * @brief Verify that a Montgomery coefficient is valid + * + * @param A an fp2_t + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_verify_A(const fp2_t *A); + +/** + * @brief Initialize an elliptic curve from a coefficient + * + * @param A an fp2_t + * @param E the elliptic curve to initialize + * + * @return 0 if curve is invalid, 1 otherwise + */ +int ec_curve_init_from_A(ec_curve_t *E, const fp2_t *A); + +// Copying points, bases and curves +static inline void +copy_point(ec_point_t *P, const ec_point_t *Q) +{ + fp2_copy(&P->x, &Q->x); + fp2_copy(&P->z, &Q->z); +} + +static inline void +copy_basis(ec_basis_t *B1, const ec_basis_t *B0) +{ + copy_point(&B1->P, &B0->P); + copy_point(&B1->Q, &B0->Q); + copy_point(&B1->PmQ, &B0->PmQ); +} + +static inline void +copy_curve(ec_curve_t *E1, const ec_curve_t *E2) +{ + fp2_copy(&(E1->A), &(E2->A)); + fp2_copy(&(E1->C), &(E2->C)); + E1->is_A24_computed_and_normalized = E2->is_A24_computed_and_normalized; + copy_point(&E1->A24, &E2->A24); +} + +// Functions for working with the A24 point and normalisation + +/** + * @brief Reduce (A : C) to (A/C : 1) in place + * + * @param E a curve + */ +void ec_normalize_curve(ec_curve_t *E); + +/** + * @brief Reduce (A + 2 : 4C) to ((A+2)/4C : 1) in place + * + * @param E a curve + */ +void ec_curve_normalize_A24(ec_curve_t *E); + +/** + * @brief Normalise both (A : C) and (A + 2 : 4C) as above, in place + * + * @param E a curve + */ +void ec_normalize_curve_and_A24(ec_curve_t *E); + +/** + * @brief Given a curve E, compute (A+2 : 4C) + * + * @param A24 the value (A+2 : 4C) to return into + * @param E a curve + */ +static inline void +AC_to_A24(ec_point_t *A24, const ec_curve_t *E) +{ + // Maybe we already have this computed + if (E->is_A24_computed_and_normalized) { + copy_point(A24, &E->A24); + return; + } + + // A24 = (A+2C : 4C) + fp2_add(&A24->z, &E->C, &E->C); + fp2_add(&A24->x, &E->A, &A24->z); + fp2_add(&A24->z, &A24->z, &A24->z); +} + +/** + * @brief Given a curve the point (A+2 : 4C) compute the curve coefficients (A : C) + * + * @param E a curve to compute + * @param A24 the value (A+2 : 4C) + */ +static inline void +A24_to_AC(ec_curve_t *E, const ec_point_t *A24) +{ + // (A:C) = ((A+2C)*2-4C : 4C) + fp2_add(&E->A, &A24->x, &A24->x); + fp2_sub(&E->A, &E->A, &A24->z); + fp2_add(&E->A, &E->A, &E->A); + fp2_copy(&E->C, &A24->z); +} + +/** + * @brief j-invariant. + * + * @param j_inv computed j_invariant + * @param curve input curve + */ +void ec_j_inv(fp2_t *j_inv, const ec_curve_t *curve); + +/** + * @brief Isomorphism of elliptic curve + * Takes as input two isomorphic Kummer lines in Montgomery form, and output an isomorphism between + * them + * + * @param isom computed isomorphism + * @param from domain curve + * @param to image curve + * @return 0xFFFFFFFF if there was an error during the computation, zero otherwise + */ +uint32_t ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to); + +/** + * @brief In-place evaluation of an isomorphism + * + * @param P a point + * @param isom an isomorphism + */ +void ec_iso_eval(ec_point_t *P, ec_isom_t *isom); + +/** @} + */ +/** @defgroup ec_point_t Point operations + * @{ + */ + +/** + * @brief Point equality + * + * @param P a point + * @param Q a point + * @return 0xFFFFFFFF if equal, zero otherwise + */ +uint32_t ec_is_equal(const ec_point_t *P, const ec_point_t *Q); + +/** + * @brief Point equality + * + * @param P a point + * @return 0xFFFFFFFF if point at infinity, zero otherwise + */ +uint32_t ec_is_zero(const ec_point_t *P); + +/** + * @brief Two torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_two_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Four torsion test + * + * @param P a point + * @param E the elliptic curve + * @return 0xFFFFFFFF if P is 2-torsion but not zero, zero otherwise + */ +uint32_t ec_is_four_torsion(const ec_point_t *P, const ec_curve_t *E); + +/** + * @brief Reduce Z-coordinate of point in place + * + * @param P a point + */ +void ec_normalize_point(ec_point_t *P); + +void xDBL_E0(ec_point_t *Q, const ec_point_t *P); +void xADD(ec_point_t *R, const ec_point_t *P, const ec_point_t *Q, const ec_point_t *PQ); +void xDBL_A24(ec_point_t *Q, const ec_point_t *P, const ec_point_t *A24, const bool A24_normalized); + +/** + * @brief Point doubling + * + * @param res computed double of P + * @param P a point + * @param curve an elliptic curve + */ +void ec_dbl(ec_point_t *res, const ec_point_t *P, const ec_curve_t *curve); + +/** + * @brief Point iterated doubling + * + * @param res computed double of P + * @param P a point + * @param n the number of double + * @param curve the curve on which P lays + */ +void ec_dbl_iter(ec_point_t *res, int n, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Iterated doubling for a basis P, Q, PmQ + * + * @param res the computed iterated double of basis B + * @param n the number of doubles + * @param B the basis to double + * @param curve the parent curve of the basis + */ +void ec_dbl_iter_basis(ec_basis_t *res, int n, const ec_basis_t *B, ec_curve_t *curve); + +/** + * @brief Point multiplication + * + * @param res computed scalar * P + * @param curve the curve + * @param scalar an unsigned multi-precision integer + * @param P a point + * @param kbits numer of bits of the scalar + */ +void ec_mul(ec_point_t *res, const digit_t *scalar, const int kbits, const ec_point_t *P, ec_curve_t *curve); + +/** + * @brief Combination P+m*Q + * + * @param R computed P + m * Q + * @param curve the curve + * @param m an unsigned multi-precision integer + * @param P a point + * @param Q a point + * @param PQ the difference P-Q + * @return 0 if there was an error, 1 otherwise + */ +int ec_ladder3pt(ec_point_t *R, + const digit_t *m, + const ec_point_t *P, + const ec_point_t *Q, + const ec_point_t *PQ, + const ec_curve_t *curve); + +/** + * @brief Linear combination of points of a basis + * + * @param res computed scalarP * P + scalarQ * Q + * @param scalarP an unsigned multi-precision integer + * @param scalarQ an unsigned multi-precision integer + * @param kbits number of bits of the scalars, or n for points of order 2^n + * @param PQ a torsion basis consisting of points P and Q + * @param curve the curve + * + * @return 0 if there was an error, 1 otherwise + */ +int ec_biscalar_mul(ec_point_t *res, + const digit_t *scalarP, + const digit_t *scalarQ, + const int kbits, + const ec_basis_t *PQ, + const ec_curve_t *curve); + +// end point computations +/** + * @} + */ + +/** @defgroup ec_dlog_t Torsion basis computations + * @{ + */ + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve along with a hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * + * @return A hint + * + * The algorithm is deterministc + */ +uint8_t ec_curve_to_basis_2f_to_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f); + +/** + * @brief Generate a 2^f-torsion basis from a Montgomery curve and a given hint + * + * @param PQ2 an ec_basis_t + * @param curve an ec_curve_t + * @param f an integer + * @param hint the hint + * + * @return 1 is the basis is valid, 0 otherwise + * + * The algorithm is deterministc + */ +int ec_curve_to_basis_2f_from_hint(ec_basis_t *PQ2, ec_curve_t *curve, int f, const uint8_t hint); +/** // end basis computations + * @} + */ + +/** @defgroup ec_isog_t Isogenies + * @{ + */ + +/** + * @brief Evaluate isogeny of even degree on list of points. + * Returns 0 if successful and -1 if kernel has the wrong order or includes (0:1). + * + * @param image computed image curve + * @param phi isogeny + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points); + +/** + * @brief Multiplicative strategy for a short isogeny chain. Returns 1 if successfull and -1 + * if kernel has the wrong order or includes (0:1) when special=false. + * + * @param curve domain curve, to be overwritten by the codomain curve. + * @param kernel a kernel generator of order 2^len + * @param len the length of t he 2-isogeny chain + * @param points a list of points to evaluate the isogeny on, modified in place + * @param len_points length of the list points + * @param special if true, allow isogenies with (0:1) in the kernel + * + * @return 0 if there was no error, 0xFFFFFFFF otherwise + */ +uint32_t ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special); + +/** + * @brief Recover Y-coordinate from X-coordinate and curve coefficients. + * + * @param y: a y-coordinate + * @param Px: a x-coordinate + * @param curve: the elliptic curve + * + * @return 0xFFFFFFFF if the point was on the curve, 0 otherwise + */ +uint32_t ec_recover_y(fp2_t *y, const fp2_t *Px, const ec_curve_t *curve); + +// Jacobian point init and copying +void jac_init(jac_point_t *P); +void copy_jac_point(jac_point_t *P, const jac_point_t *Q); + +/** + * @brief Test if two Jacobian points are equal + * + * @param P: a point + * @param Q: a point + * + * @return 0xFFFFFFFF if they are equal, 0 otherwise + */ +uint32_t jac_is_equal(const jac_point_t *P, const jac_point_t *Q); + +// Convert from Jacobian to x-only (just drop the Y-coordinate) +void jac_to_xz(ec_point_t *P, const jac_point_t *xyP); +// Convert from Jacobian coordinates in Montgomery model to Weierstrass +void jac_to_ws(jac_point_t *P, fp2_t *t, fp2_t *ao3, const jac_point_t *Q, const ec_curve_t *curve); +void jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve); + +// Jacobian arithmetic +void jac_neg(jac_point_t *Q, const jac_point_t *P); +void ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); +void DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC); +void DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t); +void jac_to_xz_add_components(add_components_t *uvw, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + * + * + * Lifts a basis x(P), x(Q), x(P-Q) assuming the curve has (A/C : 1) and + * the point P = (X/Z : 1). For generic implementation see lift_basis() + */ +uint32_t lift_basis_normalized(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Given a basis in x-only, lift to a pair of Jacobian points + * + * @param P: a point + * @param Q: a point + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if there was no error, 0 otherwise + */ +uint32_t lift_basis(jac_point_t *P, jac_point_t *Q, ec_basis_t *B, ec_curve_t *E); + +/** + * @brief Check if basis points (P, Q) form a full 4-basis + * + * @param B: a basis + * @param E: an elliptic curve + * + * @return 0xFFFFFFFF if they form a basis, 0 otherwise + */ +uint32_t ec_is_basis_four_torsion(const ec_basis_t *B, const ec_curve_t *E); + +/* + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Test functions for printing and order checking, only used in debug mode + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ + +/** + * @brief Check if a point (X : Z) has order exactly 2^t + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_point_order_twof(const ec_point_t *P, const ec_curve_t *E, int t) +{ + ec_point_t test; + ec_curve_t curve; + test = *P; + copy_curve(&curve, E); + + if (ec_is_zero(&test)) + return 0; + // Scale point by 2^(t-1) + ec_dbl_iter(&test, t - 1, &test, &curve); + // If it's zero now, it doesnt have order 2^t + if (ec_is_zero(&test)) + return 0; + // Ensure [2^t] P = 0 + ec_dbl(&test, &test, &curve); + return ec_is_zero(&test); +} + +/** + * @brief Check if basis points (P, Q, PmQ) all have order exactly 2^t + * + * @param B: a basis + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_basis_order_twof(const ec_basis_t *B, const ec_curve_t *E, int t) +{ + int check_P = test_point_order_twof(&B->P, E, t); + int check_Q = test_point_order_twof(&B->Q, E, t); + int check_PmQ = test_point_order_twof(&B->PmQ, E, t); + + return check_P & check_Q & check_PmQ; +} + +/** + * @brief Check if a Jacobian point (X : Y : Z) has order exactly 2^f + * + * @param P: a point + * @param E: an elliptic curve + * @param t: an integer + * + * @return 0xFFFFFFFF if the order is correct, 0 otherwise + */ +static int +test_jac_order_twof(const jac_point_t *P, const ec_curve_t *E, int t) +{ + jac_point_t test; + test = *P; + if (fp2_is_zero(&test.z)) + return 0; + for (int i = 0; i < t - 1; i++) { + DBL(&test, &test, E); + } + if (fp2_is_zero(&test.z)) + return 0; + DBL(&test, &test, E); + return (fp2_is_zero(&test.z)); +} + +// Prints the x-coordinate of the point (X : 1) +static void +ec_point_print(const char *name, ec_point_t P) +{ + fp2_t a; + if (fp2_is_zero(&P.z)) { + printf("%s = INF\n", name); + } else { + fp2_copy(&a, &P.z); + fp2_inv(&a); + fp2_mul(&a, &a, &P.x); + fp2_print(name, &a); + } +} + +// Prints the Montgomery coefficient A +static void +ec_curve_print(const char *name, ec_curve_t E) +{ + fp2_t a; + fp2_copy(&a, &E.C); + fp2_inv(&a); + fp2_mul(&a, &a, &E.A); + fp2_print(name, &a); +} + +#endif +// end isogeny computations +/** + * @} + */ + +// end ec +/** + * @} + */ diff --git a/src/pqm4/sqisign_lvl5/ref/ec_jac.c b/src/pqm4/sqisign_lvl5/ref/ec_jac.c new file mode 100644 index 0000000..20ca68c --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/ec_jac.c @@ -0,0 +1,335 @@ +#include +#include + +void +jac_init(jac_point_t *P) +{ // Initialize Montgomery in Jacobian coordinates as identity element (0:1:0) + fp2_set_zero(&P->x); + fp2_set_one(&P->y); + fp2_set_zero(&P->z); +} + +uint32_t +jac_is_equal(const jac_point_t *P, const jac_point_t *Q) +{ // Evaluate if two points in Jacobian coordinates (X:Y:Z) are equal + // Returns 1 (true) if P=Q, 0 (false) otherwise + fp2_t t0, t1, t2, t3; + + fp2_sqr(&t0, &Q->z); + fp2_mul(&t2, &P->x, &t0); // x1*z2^2 + fp2_sqr(&t1, &P->z); + fp2_mul(&t3, &Q->x, &t1); // x2*z1^2 + fp2_sub(&t2, &t2, &t3); + + fp2_mul(&t0, &t0, &Q->z); + fp2_mul(&t0, &P->y, &t0); // y1*z2^3 + fp2_mul(&t1, &t1, &P->z); + fp2_mul(&t1, &Q->y, &t1); // y2*z1^3 + fp2_sub(&t0, &t0, &t1); + + return fp2_is_zero(&t0) & fp2_is_zero(&t2); +} + +void +jac_to_xz(ec_point_t *P, const jac_point_t *xyP) +{ + fp2_copy(&P->x, &xyP->x); + fp2_copy(&P->z, &xyP->z); + fp2_sqr(&P->z, &P->z); + + // If xyP = (0:1:0), we currently have P=(0 : 0) but we want to set P=(1:0) + uint32_t c1, c2; + fp2_t one; + fp2_set_one(&one); + + c1 = fp2_is_zero(&P->x); + c2 = fp2_is_zero(&P->z); + fp2_select(&P->x, &P->x, &one, c1 & c2); +} + +void +jac_to_ws(jac_point_t *Q, fp2_t *t, fp2_t *ao3, const jac_point_t *P, const ec_curve_t *curve) +{ + // Cost of 3M + 2S when A != 0. + fp_t one; + fp2_t a; + /* a = 1 - A^2/3, U = X + (A*Z^2)/3, V = Y, W = Z, T = a*Z^4*/ + fp_set_one(&one); + if (!fp2_is_zero(&(curve->A))) { + fp_div3(&(ao3->re), &(curve->A.re)); + fp_div3(&(ao3->im), &(curve->A.im)); + fp2_sqr(t, &P->z); + fp2_mul(&Q->x, ao3, t); + fp2_add(&Q->x, &Q->x, &P->x); + fp2_sqr(t, t); + fp2_mul(&a, ao3, &(curve->A)); + fp_sub(&(a.re), &one, &(a.re)); + fp_neg(&(a.im), &(a.im)); + fp2_mul(t, t, &a); + } else { + fp2_copy(&Q->x, &P->x); + fp2_sqr(t, &P->z); + fp2_sqr(t, t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +jac_from_ws(jac_point_t *Q, const jac_point_t *P, const fp2_t *ao3, const ec_curve_t *curve) +{ + // Cost of 1M + 1S when A != 0. + fp2_t t; + /* X = U - (A*W^2)/3, Y = V, Z = W. */ + if (!fp2_is_zero(&(curve->A))) { + fp2_sqr(&t, &P->z); + fp2_mul(&t, &t, ao3); + fp2_sub(&Q->x, &P->x, &t); + } + fp2_copy(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +copy_jac_point(jac_point_t *P, const jac_point_t *Q) +{ + fp2_copy(&(P->x), &(Q->x)); + fp2_copy(&(P->y), &(Q->y)); + fp2_copy(&(P->z), &(Q->z)); +} + +void +jac_neg(jac_point_t *Q, const jac_point_t *P) +{ + fp2_copy(&Q->x, &P->x); + fp2_neg(&Q->y, &P->y); + fp2_copy(&Q->z, &P->z); +} + +void +DBL(jac_point_t *Q, const jac_point_t *P, const ec_curve_t *AC) +{ // Cost of 6M + 6S. + // Doubling on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding to + // (X/Z^2,Y/Z^3) This version receives the coefficient value A + fp2_t t0, t1, t2, t3; + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // t0 = 3x1^2 + fp2_sqr(&t1, &P->z); // t1 = z1^2 + fp2_mul(&t2, &P->x, &AC->A); + fp2_add(&t2, &t2, &t2); // t2 = 2Ax1 + fp2_add(&t2, &t1, &t2); // t2 = 2Ax1+z1^2 + fp2_mul(&t2, &t1, &t2); // t2 = z1^2(2Ax1+z1^2) + fp2_add(&t2, &t0, &t2); // t2 = alpha = 3x1^2 + z1^2(2Ax1+z1^2) + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); // z2 = 2y1z1 + fp2_sqr(&t0, &Q->z); + fp2_mul(&t0, &t0, &AC->A); // t0 = 4Ay1^2z1^2 + fp2_sqr(&t1, &P->y); + fp2_add(&t1, &t1, &t1); // t1 = 2y1^2 + fp2_add(&t3, &P->x, &P->x); // t3 = 2x1 + fp2_mul(&t3, &t1, &t3); // t3 = 4x1y1^2 + fp2_sqr(&Q->x, &t2); // x2 = alpha^2 + fp2_sub(&Q->x, &Q->x, &t0); // x2 = alpha^2 - 4Ay1^2z1^2 + fp2_sub(&Q->x, &Q->x, &t3); + fp2_sub(&Q->x, &Q->x, &t3); // x2 = alpha^2 - 4Ay1^2z1^2 - 8x1y1^2 + fp2_sub(&Q->y, &t3, &Q->x); // y2 = 4x1y1^2 - x2 + fp2_mul(&Q->y, &Q->y, &t2); // y2 = alpha(4x1y1^2 - x2) + fp2_sqr(&t1, &t1); // t1 = 4y1^4 + fp2_sub(&Q->y, &Q->y, &t1); + fp2_sub(&Q->y, &Q->y, &t1); // y2 = alpha(4x1y1^2 - x2) - 8y1^4 + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +DBLW(jac_point_t *Q, fp2_t *u, const jac_point_t *P, const fp2_t *t) +{ // Cost of 3M + 5S. + // Doubling on a Weierstrass curve, representation in modified Jacobian coordinates + // (X:Y:Z:T=a*Z^4) corresponding to (X/Z^2,Y/Z^3), where a is the curve coefficient. + // Formula from https://hyperelliptic.org/EFD/g1p/auto-shortw-modified.html + + uint32_t flag = fp2_is_zero(&P->x) & fp2_is_zero(&P->z); + + fp2_t xx, c, cc, r, s, m; + // XX = X^2 + fp2_sqr(&xx, &P->x); + // A = 2*Y^2 + fp2_sqr(&c, &P->y); + fp2_add(&c, &c, &c); + // AA = A^2 + fp2_sqr(&cc, &c); + // R = 2*AA + fp2_add(&r, &cc, &cc); + // S = (X+A)^2-XX-AA + fp2_add(&s, &P->x, &c); + fp2_sqr(&s, &s); + fp2_sub(&s, &s, &xx); + fp2_sub(&s, &s, &cc); + // M = 3*XX+T1 + fp2_add(&m, &xx, &xx); + fp2_add(&m, &m, &xx); + fp2_add(&m, &m, t); + // X3 = M^2-2*S + fp2_sqr(&Q->x, &m); + fp2_sub(&Q->x, &Q->x, &s); + fp2_sub(&Q->x, &Q->x, &s); + // Z3 = 2*Y*Z + fp2_mul(&Q->z, &P->y, &P->z); + fp2_add(&Q->z, &Q->z, &Q->z); + // Y3 = M*(S-X3)-R + fp2_sub(&Q->y, &s, &Q->x); + fp2_mul(&Q->y, &Q->y, &m); + fp2_sub(&Q->y, &Q->y, &r); + // T3 = 2*R*T1 + fp2_mul(u, t, &r); + fp2_add(u, u, u); + + fp2_select(&Q->x, &Q->x, &P->x, -flag); + fp2_select(&Q->z, &Q->z, &P->z, -flag); +} + +void +select_jac_point(jac_point_t *Q, const jac_point_t *P1, const jac_point_t *P2, const digit_t option) +{ // Select points + // If option = 0 then Q <- P1, else if option = 0xFF...FF then Q <- P2 + fp2_select(&(Q->x), &(P1->x), &(P2->x), option); + fp2_select(&(Q->y), &(P1->y), &(P2->y), option); + fp2_select(&(Q->z), &(P1->z), &(P2->z), option); +} + +void +ADD(jac_point_t *R, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Addition on a Montgomery curve, representation in Jacobian coordinates (X:Y:Z) corresponding + // to (x,y) = (X/Z^2,Y/Z^3) This version receives the coefficient value A + // + // Complete routine, to handle all edge cases: + // if ZP == 0: # P == inf + // return Q + // if ZQ == 0: # Q == inf + // return P + // dy <- YQ*ZP**3 - YP*ZQ**3 + // dx <- XQ*ZP**2 - XP*ZQ**2 + // if dx == 0: # x1 == x2 + // if dy == 0: # ... and y1 == y2: doubling case + // dy <- ZP*ZQ * (3*XP^2 + ZP^2 * (2*A*XP + ZP^2)) + // dx <- 2*YP*ZP + // else: # ... but y1 != y2, thus P = -Q + // return inf + // XR <- dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) + // YR <- dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3 + // ZR <- dx * ZP * ZQ + + // Constant time processing: + // - The case for P == 0 or Q == 0 is handled at the end with conditional select + // - dy and dx are computed for both the normal and doubling cases, we switch when + // dx == dy == 0 for the normal case. + // - If we have that P = -Q then dx = 0 and so ZR will be zero, giving us the point + // at infinity for "free". + // + // These current formula are expensive and I'm probably missing some tricks... + // Thought I'd get the ball rolling. + // Cost 17M + 6S + 13a + fp2_t t0, t1, t2, t3, u1, u2, v1, dx, dy; + + /* If P is zero or Q is zero we will conditionally swap before returning. */ + uint32_t ctl1 = fp2_is_zero(&P->z); + uint32_t ctl2 = fp2_is_zero(&Q->z); + + /* Precompute some values */ + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + + /* Compute dy and dx for ordinary case */ + fp2_mul(&v1, &t1, &Q->z); // v1 = z2^3 + fp2_mul(&t2, &t0, &P->z); // t2 = z1^3 + fp2_mul(&v1, &v1, &P->y); // v1 = y1z2^3 + fp2_mul(&t2, &t2, &Q->y); // t2 = y2z1^3 + fp2_sub(&dy, &t2, &v1); // dy = y2z1^3 - y1z2^3 + fp2_mul(&u2, &t0, &Q->x); // u2 = x2z1^2 + fp2_mul(&u1, &t1, &P->x); // u1 = x1z2^2 + fp2_sub(&dx, &u2, &u1); // dx = x2z1^2 - x1z2^2 + + /* Compute dy and dx for doubling case */ + fp2_add(&t1, &P->y, &P->y); // dx_dbl = t1 = 2y1 + fp2_add(&t2, &AC->A, &AC->A); // t2 = 2A + fp2_mul(&t2, &t2, &P->x); // t2 = 2Ax1 + fp2_add(&t2, &t2, &t0); // t2 = 2Ax1 + z1^2 + fp2_mul(&t2, &t2, &t0); // t2 = z1^2 * (2Ax1 + z1^2) + fp2_sqr(&t0, &P->x); // t0 = x1^2 + fp2_add(&t2, &t2, &t0); // t2 = x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 2*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_add(&t2, &t2, &t0); // t2 = 3*x1^2 + z1^2 * (2Ax1 + z1^2) + fp2_mul(&t2, &t2, &Q->z); // dy_dbl = t2 = z2 * (3*x1^2 + z1^2 * (2Ax1 + z1^2)) + + /* If dx is zero and dy is zero swap with double variables */ + uint32_t ctl = fp2_is_zero(&dx) & fp2_is_zero(&dy); + fp2_select(&dx, &dx, &t1, ctl); + fp2_select(&dy, &dy, &t2, ctl); + + /* Some more precomputations */ + fp2_mul(&t0, &P->z, &Q->z); // t0 = z1z2 + fp2_sqr(&t1, &t0); // t1 = z1z2^2 + fp2_sqr(&t2, &dx); // t2 = dx^2 + fp2_sqr(&t3, &dy); // t3 = dy^2 + + /* Compute x3 = dy**2 - dx**2 * (A*ZP^2*ZQ^2 + XP*ZQ^2 + XQ*ZP^2) */ + fp2_mul(&R->x, &AC->A, &t1); // x3 = A*(z1z2)^2 + fp2_add(&R->x, &R->x, &u1); // x3 = A*(z1z2)^2 + u1 + fp2_add(&R->x, &R->x, &u2); // x3 = A*(z1z2)^2 + u1 + u2 + fp2_mul(&R->x, &R->x, &t2); // x3 = dx^2 * (A*(z1z2)^2 + u1 + u2) + fp2_sub(&R->x, &t3, &R->x); // x3 = dy^2 - dx^2 * (A*(z1z2)^2 + u1 + u2) + + /* Compute y3 = dy * (XP*ZQ^2 * dx^2 - XR) - YP*ZQ^3 * dx^3*/ + fp2_mul(&R->y, &u1, &t2); // y3 = u1 * dx^2 + fp2_sub(&R->y, &R->y, &R->x); // y3 = u1 * dx^2 - x3 + fp2_mul(&R->y, &R->y, &dy); // y3 = dy * (u1 * dx^2 - x3) + fp2_mul(&t3, &t2, &dx); // t3 = dx^3 + fp2_mul(&t3, &t3, &v1); // t3 = v1 * dx^3 + fp2_sub(&R->y, &R->y, &t3); // y3 = dy * (u1 * dx^2 - x3) - v1 * dx^3 + + /* Compute z3 = dx * z1 * z2 */ + fp2_mul(&R->z, &dx, &t0); + + /* Finally, we need to set R = P is Q.Z = 0 and R = Q if P.Z = 0 */ + select_jac_point(R, R, Q, ctl1); + select_jac_point(R, R, P, ctl2); +} + +void +jac_to_xz_add_components(add_components_t *add_comp, const jac_point_t *P, const jac_point_t *Q, const ec_curve_t *AC) +{ + // Take P and Q in E distinct, two jac_point_t, return three components u,v and w in Fp2 such + // that the xz coordinates of P+Q are (u-v:w) and of P-Q are (u+v:w) + + fp2_t t0, t1, t2, t3, t4, t5, t6; + + fp2_sqr(&t0, &P->z); // t0 = z1^2 + fp2_sqr(&t1, &Q->z); // t1 = z2^2 + fp2_mul(&t2, &P->x, &t1); // t2 = x1z2^2 + fp2_mul(&t3, &t0, &Q->x); // t3 = z1^2x2 + fp2_mul(&t4, &P->y, &Q->z); // t4 = y1z2 + fp2_mul(&t4, &t4, &t1); // t4 = y1z2^3 + fp2_mul(&t5, &P->z, &Q->y); // t5 = z1y2 + fp2_mul(&t5, &t5, &t0); // t5 = z1^3y2 + fp2_mul(&t0, &t0, &t1); // t0 = (z1z2)^2 + fp2_mul(&t6, &t4, &t5); // t6 = (z1z_2)^3y1y2 + fp2_add(&add_comp->v, &t6, &t6); // v = 2(z1z_2)^3y1y2 + fp2_sqr(&t4, &t4); // t4 = y1^2z2^6 + fp2_sqr(&t5, &t5); // t5 = z1^6y_2^2 + fp2_add(&t4, &t4, &t5); // t4 = z1^6y_2^2 + y1^2z2^6 + fp2_add(&t5, &t2, &t3); // t5 = x1z2^2 +z_1^2x2 + fp2_add(&t6, &t3, &t3); // t6 = 2z_1^2x2 + fp2_sub(&t6, &t5, &t6); // t6 = lambda = x1z2^2 - z_1^2x2 + fp2_sqr(&t6, &t6); // t6 = lambda^2 = (x1z2^2 - z_1^2x2)^2 + fp2_mul(&t1, &AC->A, &t0); // t1 = A*(z1z2)^2 + fp2_add(&t1, &t5, &t1); // t1 = gamma =A*(z1z2)^2 + x1z2^2 +z_1^2x2 + fp2_mul(&t1, &t1, &t6); // t1 = gamma*lambda^2 + fp2_sub(&add_comp->u, &t4, &t1); // u = z1^6y_2^2 + y1^2z2^6 - gamma*lambda^2 + fp2_mul(&add_comp->w, &t6, &t0); // w = (z1z2)^2(lambda)^2 +} diff --git a/src/pqm4/sqisign_lvl5/ref/ec_params.c b/src/pqm4/sqisign_lvl5/ref/ec_params.c new file mode 100644 index 0000000..d2aa074 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/ec_params.c @@ -0,0 +1,4 @@ +#include +// p+1 divided by the power of 2 +const digit_t p_cofactor_for_2f[1] = {27}; + diff --git a/src/pqm4/sqisign_lvl5/ref/ec_params.h b/src/pqm4/sqisign_lvl5/ref/ec_params.h new file mode 100644 index 0000000..9f2aca3 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/ec_params.h @@ -0,0 +1,12 @@ +#ifndef EC_PARAMS_H +#define EC_PARAMS_H + +#include + +#define TORSION_EVEN_POWER 500 + +// p+1 divided by the power of 2 +extern const digit_t p_cofactor_for_2f[1]; +#define P_COFACTOR_FOR_2F_BITLENGTH 5 + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/encode_verification.c b/src/pqm4/sqisign_lvl5/ref/encode_verification.c new file mode 100644 index 0000000..fecdb9c --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/encode_verification.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include + +typedef unsigned char byte_t; + +// digits + +static void +encode_digits(byte_t *enc, const digit_t *x, size_t nbytes) +{ +#ifdef TARGET_BIG_ENDIAN + const size_t ndigits = nbytes / sizeof(digit_t); + const size_t rem = nbytes % sizeof(digit_t); + + for (size_t i = 0; i < ndigits; i++) + ((digit_t *)enc)[i] = BSWAP_DIGIT(x[i]); + if (rem) { + digit_t ld = BSWAP_DIGIT(x[ndigits]); + memcpy(enc + ndigits * sizeof(digit_t), (byte_t *)&ld, rem); + } +#else + memcpy(enc, (const byte_t *)x, nbytes); +#endif +} + +static void +decode_digits(digit_t *x, const byte_t *enc, size_t nbytes, size_t ndigits) +{ + assert(nbytes <= ndigits * sizeof(digit_t)); + memcpy((byte_t *)x, enc, nbytes); + memset((byte_t *)x + nbytes, 0, ndigits * sizeof(digit_t) - nbytes); + +#ifdef TARGET_BIG_ENDIAN + for (size_t i = 0; i < ndigits; i++) + x[i] = BSWAP_DIGIT(x[i]); +#endif +} + +// fp2_t + +static byte_t * +fp2_to_bytes(byte_t *enc, const fp2_t *x) +{ + fp2_encode(enc, x); + return enc + FP2_ENCODED_BYTES; +} + +static const byte_t * +fp2_from_bytes(fp2_t *x, const byte_t *enc) +{ + fp2_decode(x, enc); + return enc + FP2_ENCODED_BYTES; +} + +// curves and points + +static byte_t * +proj_to_bytes(byte_t *enc, const fp2_t *x, const fp2_t *z) +{ + assert(!fp2_is_zero(z)); + fp2_t tmp = *z; + fp2_inv(&tmp); +#ifndef NDEBUG + { + fp2_t chk; + fp2_mul(&chk, z, &tmp); + fp2_t one; + fp2_set_one(&one); + assert(fp2_is_equal(&chk, &one)); + } +#endif + fp2_mul(&tmp, x, &tmp); + enc = fp2_to_bytes(enc, &tmp); + return enc; +} + +static const byte_t * +proj_from_bytes(fp2_t *x, fp2_t *z, const byte_t *enc) +{ + enc = fp2_from_bytes(x, enc); + fp2_set_one(z); + return enc; +} + +static byte_t * +ec_curve_to_bytes(byte_t *enc, const ec_curve_t *curve) +{ + return proj_to_bytes(enc, &curve->A, &curve->C); +} + +static const byte_t * +ec_curve_from_bytes(ec_curve_t *curve, const byte_t *enc) +{ + memset(curve, 0, sizeof(*curve)); + return proj_from_bytes(&curve->A, &curve->C, enc); +} + +static byte_t * +ec_point_to_bytes(byte_t *enc, const ec_point_t *point) +{ + return proj_to_bytes(enc, &point->x, &point->z); +} + +static const byte_t * +ec_point_from_bytes(ec_point_t *point, const byte_t *enc) +{ + return proj_from_bytes(&point->x, &point->z, enc); +} + +static byte_t * +ec_basis_to_bytes(byte_t *enc, const ec_basis_t *basis) +{ + enc = ec_point_to_bytes(enc, &basis->P); + enc = ec_point_to_bytes(enc, &basis->Q); + enc = ec_point_to_bytes(enc, &basis->PmQ); + return enc; +} + +static const byte_t * +ec_basis_from_bytes(ec_basis_t *basis, const byte_t *enc) +{ + enc = ec_point_from_bytes(&basis->P, enc); + enc = ec_point_from_bytes(&basis->Q, enc); + enc = ec_point_from_bytes(&basis->PmQ, enc); + return enc; +} + +// public API + +byte_t * +public_key_to_bytes(byte_t *enc, const public_key_t *pk) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_to_bytes(enc, &pk->curve); + *enc++ = pk->hint_pk; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +const byte_t * +public_key_from_bytes(public_key_t *pk, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + enc = ec_curve_from_bytes(&pk->curve, enc); + pk->hint_pk = *enc++; + assert(enc - start == PUBLICKEY_BYTES); + return enc; +} + +void +signature_to_bytes(byte_t *enc, const signature_t *sig) +{ +#ifndef NDEBUG + byte_t *const start = enc; +#endif + + enc = fp2_to_bytes(enc, &sig->E_aux_A); + + *enc++ = sig->backtracking; + *enc++ = sig->two_resp_length; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[0][1], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][0], nbytes); + enc += nbytes; + encode_digits(enc, sig->mat_Bchall_can_to_B_chall[1][1], nbytes); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + encode_digits(enc, sig->chall_coeff, nbytes); + enc += nbytes; + + *enc++ = sig->hint_aux; + *enc++ = sig->hint_chall; + + assert(enc - start == SIGNATURE_BYTES); +} + +void +signature_from_bytes(signature_t *sig, const byte_t *enc) +{ +#ifndef NDEBUG + const byte_t *const start = enc; +#endif + + enc = fp2_from_bytes(&sig->E_aux_A, enc); + + sig->backtracking = *enc++; + sig->two_resp_length = *enc++; + + size_t nbytes = (SQIsign_response_length + 9) / 8; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[0][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][0], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + decode_digits(sig->mat_Bchall_can_to_B_chall[1][1], enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + nbytes = SECURITY_BITS / 8; + decode_digits(sig->chall_coeff, enc, nbytes, NWORDS_ORDER); + enc += nbytes; + + sig->hint_aux = *enc++; + sig->hint_chall = *enc++; + + assert(enc - start == SIGNATURE_BYTES); +} diff --git a/src/pqm4/sqisign_lvl5/ref/encoded_sizes.h b/src/pqm4/sqisign_lvl5/ref/encoded_sizes.h new file mode 100644 index 0000000..3aafb0d --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/encoded_sizes.h @@ -0,0 +1,11 @@ +#define SECURITY_BITS 256 +#define SQIsign_response_length 253 +#define HASH_ITERATIONS 512 +#define FP_ENCODED_BYTES 64 +#define FP2_ENCODED_BYTES 128 +#define EC_CURVE_ENCODED_BYTES 128 +#define EC_POINT_ENCODED_BYTES 128 +#define EC_BASIS_ENCODED_BYTES 384 +#define PUBLICKEY_BYTES 129 +#define SECRETKEY_BYTES 701 +#define SIGNATURE_BYTES 292 diff --git a/src/pqm4/sqisign_lvl5/ref/fp.c b/src/pqm4/sqisign_lvl5/ref/fp.c new file mode 100644 index 0000000..48e2937 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp.c @@ -0,0 +1,15 @@ +#include + +/* + * If ctl == 0x00000000, then *d is set to a0 + * If ctl == 0xFFFFFFFF, then *d is set to a1 + * ctl MUST be either 0x00000000 or 0xFFFFFFFF. + */ +void +fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl) +{ + digit_t cw = (int32_t)ctl; + for (unsigned int i = 0; i < NWORDS_FIELD; i++) { + (*d)[i] = (*a0)[i] ^ (cw & ((*a0)[i] ^ (*a1)[i])); + } +} diff --git a/src/pqm4/sqisign_lvl5/ref/fp.h b/src/pqm4/sqisign_lvl5/ref/fp.h new file mode 100644 index 0000000..1241d58 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp.h @@ -0,0 +1,48 @@ +#ifndef FP_H +#define FP_H + +//////////////////////////////////////////////// NOTE: this is placed here for now +#include +#include +#include +#include +#include +#include +#include +#include + +typedef digit_t fp_t[NWORDS_FIELD]; // Datatype for representing field elements + +extern const digit_t ONE[NWORDS_FIELD]; +extern const digit_t ZERO[NWORDS_FIELD]; +// extern const digit_t PM1O3[NWORDS_FIELD]; + +void fp_set_small(fp_t *x, const digit_t val); +void fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val); +void fp_set_zero(fp_t *x); +void fp_set_one(fp_t *x); +uint32_t fp_is_equal(const fp_t *a, const fp_t *b); +uint32_t fp_is_zero(const fp_t *a); +void fp_copy(fp_t *out, const fp_t *a); + +void fp_encode(void *dst, const fp_t *a); +void fp_decode_reduce(fp_t *d, const void *src, size_t len); +uint32_t fp_decode(fp_t *d, const void *src); + +void fp_select(fp_t *d, const fp_t *a0, const fp_t *a1, uint32_t ctl); +void fp_cswap(fp_t *a, fp_t *b, uint32_t ctl); + +void fp_add(fp_t *out, const fp_t *a, const fp_t *b); +void fp_sub(fp_t *out, const fp_t *a, const fp_t *b); +void fp_neg(fp_t *out, const fp_t *a); +void fp_sqr(fp_t *out, const fp_t *a); +void fp_mul(fp_t *out, const fp_t *a, const fp_t *b); + +void fp_inv(fp_t *x); +uint32_t fp_is_square(const fp_t *a); +void fp_sqrt(fp_t *a); +void fp_half(fp_t *out, const fp_t *a); +void fp_exp3div4(fp_t *out, const fp_t *a); +void fp_div3(fp_t *out, const fp_t *a); + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/fp2.c b/src/pqm4/sqisign_lvl5/ref/fp2.c new file mode 100644 index 0000000..a258952 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp2.c @@ -0,0 +1,328 @@ +#include +#include +#include + +/* Arithmetic modulo X^2 + 1 */ + +void +fp2_set_small(fp2_t *x, const digit_t val) +{ + fp_set_small(&(x->re), val); + fp_set_zero(&(x->im)); +} + +void +fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n) +{ + fp_mul_small(&x->re, &y->re, n); + fp_mul_small(&x->im, &y->im, n); +} + +void +fp2_set_one(fp2_t *x) +{ + fp_set_one(&(x->re)); + fp_set_zero(&(x->im)); +} + +void +fp2_set_zero(fp2_t *x) +{ + fp_set_zero(&(x->re)); + fp_set_zero(&(x->im)); +} + +// Is a GF(p^2) element zero? +// Returns 0xFF...FF (true) if a=0, 0 (false) otherwise +uint32_t +fp2_is_zero(const fp2_t *a) +{ + return fp_is_zero(&(a->re)) & fp_is_zero(&(a->im)); +} + +// Compare two GF(p^2) elements in constant time +// Returns 0xFF...FF (true) if a=b, 0 (false) otherwise +uint32_t +fp2_is_equal(const fp2_t *a, const fp2_t *b) +{ + return fp_is_equal(&(a->re), &(b->re)) & fp_is_equal(&(a->im), &(b->im)); +} + +// Is a GF(p^2) element one? +// Returns 0xFF...FF (true) if a=1, 0 (false) otherwise +uint32_t +fp2_is_one(const fp2_t *a) +{ + return fp_is_equal(&(a->re), &ONE) & fp_is_zero(&(a->im)); +} + +void +fp2_copy(fp2_t *x, const fp2_t *y) +{ + fp_copy(&(x->re), &(y->re)); + fp_copy(&(x->im), &(y->im)); +} + +void +fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_add(&(x->re), &(y->re), &(z->re)); + fp_add(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_add_one(fp2_t *x, const fp2_t *y) +{ + fp_add(&x->re, &y->re, &ONE); + fp_copy(&x->im, &y->im); +} + +void +fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_sub(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &(y->im), &(z->im)); +} + +void +fp2_neg(fp2_t *x, const fp2_t *y) +{ + fp_neg(&(x->re), &(y->re)); + fp_neg(&(x->im), &(y->im)); +} + +void +fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z) +{ + fp_t t0, t1; + + fp_add(&t0, &(y->re), &(y->im)); + fp_add(&t1, &(z->re), &(z->im)); + fp_mul(&t0, &t0, &t1); + fp_mul(&t1, &(y->im), &(z->im)); + fp_mul(&(x->re), &(y->re), &(z->re)); + fp_sub(&(x->im), &t0, &t1); + fp_sub(&(x->im), &(x->im), &(x->re)); + fp_sub(&(x->re), &(x->re), &t1); +} + +void +fp2_sqr(fp2_t *x, const fp2_t *y) +{ + fp_t sum, diff; + + fp_add(&sum, &(y->re), &(y->im)); + fp_sub(&diff, &(y->re), &(y->im)); + fp_mul(&(x->im), &(y->re), &(y->im)); + fp_add(&(x->im), &(x->im), &(x->im)); + fp_mul(&(x->re), &sum, &diff); +} + +void +fp2_inv(fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + fp_inv(&t0); + fp_mul(&(x->re), &(x->re), &t0); + fp_mul(&(x->im), &(x->im), &t0); + fp_neg(&(x->im), &(x->im)); +} + +uint32_t +fp2_is_square(const fp2_t *x) +{ + fp_t t0, t1; + + fp_sqr(&t0, &(x->re)); + fp_sqr(&t1, &(x->im)); + fp_add(&t0, &t0, &t1); + + return fp_is_square(&t0); +} + +void +fp2_sqrt(fp2_t *a) +{ + fp_t x0, x1, t0, t1; + + /* From "Optimized One-Dimensional SQIsign Verification on Intel and + * Cortex-M4" by Aardal et al: https://eprint.iacr.org/2024/1563 */ + + // x0 = \delta = sqrt(a0^2 + a1^2). + fp_sqr(&x0, &(a->re)); + fp_sqr(&x1, &(a->im)); + fp_add(&x0, &x0, &x1); + fp_sqrt(&x0); + // If a1 = 0, there is a risk of \delta = -a0, which makes x0 = 0 below. + // In that case, we restore the value \delta = a0. + fp_select(&x0, &x0, &(a->re), fp_is_zero(&(a->im))); + // x0 = \delta + a0, t0 = 2 * x0. + fp_add(&x0, &x0, &(a->re)); + fp_add(&t0, &x0, &x0); + + // x1 = t0^(p-3)/4 + fp_exp3div4(&x1, &t0); + + // x0 = x0 * x1, x1 = x1 * a1, t1 = (2x0)^2. + fp_mul(&x0, &x0, &x1); + fp_mul(&x1, &x1, &(a->im)); + fp_add(&t1, &x0, &x0); + fp_sqr(&t1, &t1); + // If t1 = t0, return x0 + x1*i, otherwise x1 - x0*i. + fp_sub(&t0, &t0, &t1); + uint32_t f = fp_is_zero(&t0); + fp_neg(&t1, &x0); + fp_copy(&t0, &x1); + fp_select(&t0, &t0, &x0, f); + fp_select(&t1, &t1, &x1, f); + + // Check if t0 is zero + uint32_t t0_is_zero = fp_is_zero(&t0); + + // Check whether t0, t1 are odd + // Note: we encode to ensure canonical representation + uint8_t tmp_bytes[FP_ENCODED_BYTES]; + fp_encode(tmp_bytes, &t0); + uint32_t t0_is_odd = -((uint32_t)tmp_bytes[0] & 1); + fp_encode(tmp_bytes, &t1); + uint32_t t1_is_odd = -((uint32_t)tmp_bytes[0] & 1); + + // We negate the output if: + // t0 is odd, or + // t0 is zero and t1 is odd + uint32_t negate_output = t0_is_odd | (t0_is_zero & t1_is_odd); + fp_neg(&x0, &t0); + fp_select(&(a->re), &t0, &x0, negate_output); + fp_neg(&x0, &t1); + fp_select(&(a->im), &t1, &x0, negate_output); +} + +uint32_t +fp2_sqrt_verify(fp2_t *a) +{ + fp2_t t0, t1; + + fp2_copy(&t0, a); + fp2_sqrt(a); + fp2_sqr(&t1, a); + + return (fp2_is_equal(&t0, &t1)); +} + +void +fp2_half(fp2_t *x, const fp2_t *y) +{ + fp_half(&(x->re), &(y->re)); + fp_half(&(x->im), &(y->im)); +} + +void +fp2_batched_inv(fp2_t *x, int len) +{ + fp2_t t1[len], t2[len]; + fp2_t inverse; + + // x = x0,...,xn + // t1 = x0, x0*x1, ... ,x0 * x1 * ... * xn + fp2_copy(&t1[0], &x[0]); + for (int i = 1; i < len; i++) { + fp2_mul(&t1[i], &t1[i - 1], &x[i]); + } + + // inverse = 1/ (x0 * x1 * ... * xn) + fp2_copy(&inverse, &t1[len - 1]); + fp2_inv(&inverse); + + fp2_copy(&t2[0], &inverse); + // t2 = 1/ (x0 * x1 * ... * xn), 1/ (x0 * x1 * ... * x(n-1)) , ... , 1/xO + for (int i = 1; i < len; i++) { + fp2_mul(&t2[i], &t2[i - 1], &x[len - i]); + } + + fp2_copy(&x[0], &t2[len - 1]); + + for (int i = 1; i < len; i++) { + fp2_mul(&x[i], &t1[i - 1], &t2[len - i - 1]); + } +} + +// exponentiation using square and multiply +// Warning!! Not constant time! +void +fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size) +{ + fp2_t acc; + digit_t bit; + + fp2_copy(&acc, x); + fp2_set_one(out); + + // Iterate over each word of exp + for (int j = 0; j < size; j++) { + // Iterate over each bit of the word + for (int i = 0; i < RADIX; i++) { + bit = (exp[j] >> i) & 1; + if (bit == 1) { + fp2_mul(out, out, &acc); + } + fp2_sqr(&acc, &acc); + } + } +} + +void +fp2_print(const char *name, const fp2_t *a) +{ + printf("%s0x", name); + + uint8_t buf[FP_ENCODED_BYTES]; + fp_encode(&buf, &a->re); // Encoding ensures canonical rep + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + + printf(" + i*0x"); + + fp_encode(&buf, &a->im); + for (int i = 0; i < FP_ENCODED_BYTES; i++) { + printf("%02x", buf[FP_ENCODED_BYTES - i - 1]); + } + printf("\n"); +} + +void +fp2_encode(void *dst, const fp2_t *a) +{ + uint8_t *buf = dst; + fp_encode(buf, &(a->re)); + fp_encode(buf + FP_ENCODED_BYTES, &(a->im)); +} + +uint32_t +fp2_decode(fp2_t *d, const void *src) +{ + const uint8_t *buf = src; + uint32_t re, im; + + re = fp_decode(&(d->re), buf); + im = fp_decode(&(d->im), buf + FP_ENCODED_BYTES); + return re & im; +} + +void +fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl) +{ + fp_select(&(d->re), &(a0->re), &(a1->re), ctl); + fp_select(&(d->im), &(a0->im), &(a1->im), ctl); +} + +void +fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl) +{ + fp_cswap(&(a->re), &(b->re), ctl); + fp_cswap(&(a->im), &(b->im), ctl); +} diff --git a/src/pqm4/sqisign_lvl5/ref/fp2.h b/src/pqm4/sqisign_lvl5/ref/fp2.h new file mode 100644 index 0000000..00e673b --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp2.h @@ -0,0 +1,41 @@ +#ifndef FP2_H +#define FP2_H + +#include +#include "fp.h" +#include + +// Structure for representing elements in GF(p^2) +typedef struct fp2_t +{ + fp_t re, im; +} fp2_t; + +void fp2_set_small(fp2_t *x, const digit_t val); +void fp2_mul_small(fp2_t *x, const fp2_t *y, uint32_t n); +void fp2_set_one(fp2_t *x); +void fp2_set_zero(fp2_t *x); +uint32_t fp2_is_zero(const fp2_t *a); +uint32_t fp2_is_equal(const fp2_t *a, const fp2_t *b); +uint32_t fp2_is_one(const fp2_t *a); +void fp2_copy(fp2_t *x, const fp2_t *y); +void fp2_add(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_add_one(fp2_t *x, const fp2_t *y); +void fp2_sub(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_neg(fp2_t *x, const fp2_t *y); +void fp2_mul(fp2_t *x, const fp2_t *y, const fp2_t *z); +void fp2_sqr(fp2_t *x, const fp2_t *y); +void fp2_inv(fp2_t *x); +uint32_t fp2_is_square(const fp2_t *x); +void fp2_sqrt(fp2_t *x); +uint32_t fp2_sqrt_verify(fp2_t *a); +void fp2_half(fp2_t *x, const fp2_t *y); +void fp2_batched_inv(fp2_t *x, int len); +void fp2_pow_vartime(fp2_t *out, const fp2_t *x, const digit_t *exp, const int size); +void fp2_print(const char *name, const fp2_t *a); +void fp2_encode(void *dst, const fp2_t *a); +uint32_t fp2_decode(fp2_t *d, const void *src); +void fp2_select(fp2_t *d, const fp2_t *a0, const fp2_t *a1, uint32_t ctl); +void fp2_cswap(fp2_t *a, fp2_t *b, uint32_t ctl); + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/fp_constants.h b/src/pqm4/sqisign_lvl5/ref/fp_constants.h new file mode 100644 index 0000000..094cb4d --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp_constants.h @@ -0,0 +1,17 @@ +#if RADIX == 32 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 16 +#else +#define NWORDS_FIELD 18 +#endif +#define NWORDS_ORDER 16 +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +#define NWORDS_FIELD 8 +#else +#define NWORDS_FIELD 9 +#endif +#define NWORDS_ORDER 8 +#endif +#define BITS 512 +#define LOG2P 9 diff --git a/src/pqm4/sqisign_lvl5/ref/fp_p27500_32.c b/src/pqm4/sqisign_lvl5/ref/fp_p27500_32.c new file mode 100644 index 0000000..ecf5ea7 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/fp_p27500_32.c @@ -0,0 +1,1514 @@ +// clang-format off +// Command line : python monty.py 32 +// 0x1afffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + +#include +#include + +#define sspint int32_t +#define spint uint32_t +#define udpint uint64_t +#define dpint uint64_t + +#define Wordlength 32 +#define Nlimbs 18 +#define Radix 29 +#define Nbits 505 +#define Nbytes 64 + +#define MONTGOMERY +// propagate carries +inline static spint prop(spint *n) { + int i; + spint mask = ((spint)1 << 29u) - (spint)1; + sspint carry = (sspint)n[0]; + carry >>= 29u; + n[0] &= mask; + for (i = 1; i < 17; i++) { + carry += (sspint)n[i]; + n[i] = (spint)carry & mask; + carry >>= 29u; + } + n[17] += (spint)carry; + return -((n[17] >> 1) >> 30u); +} + +// propagate carries and add p if negative, propagate carries again +inline static int flatten(spint *n) { + spint carry = prop(n); + n[0] -= (spint)1u & carry; + n[17] += ((spint)0xd80u) & carry; + (void)prop(n); + return (int)(carry & 1); +} + +// Montgomery final subtract +static int modfsb(spint *n) { + n[0] += (spint)1u; + n[17] -= (spint)0xd80u; + return flatten(n); +} + +// Modular addition - reduce less than 2p +static void modadd(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] + b[0]; + n[1] = a[1] + b[1]; + n[2] = a[2] + b[2]; + n[3] = a[3] + b[3]; + n[4] = a[4] + b[4]; + n[5] = a[5] + b[5]; + n[6] = a[6] + b[6]; + n[7] = a[7] + b[7]; + n[8] = a[8] + b[8]; + n[9] = a[9] + b[9]; + n[10] = a[10] + b[10]; + n[11] = a[11] + b[11]; + n[12] = a[12] + b[12]; + n[13] = a[13] + b[13]; + n[14] = a[14] + b[14]; + n[15] = a[15] + b[15]; + n[16] = a[16] + b[16]; + n[17] = a[17] + b[17]; + n[0] += (spint)2u; + n[17] -= (spint)0x1b00u; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[17] += ((spint)0x1b00u) & carry; + (void)prop(n); +} + +// Modular subtraction - reduce less than 2p +static void modsub(const spint *a, const spint *b, spint *n) { + spint carry; + n[0] = a[0] - b[0]; + n[1] = a[1] - b[1]; + n[2] = a[2] - b[2]; + n[3] = a[3] - b[3]; + n[4] = a[4] - b[4]; + n[5] = a[5] - b[5]; + n[6] = a[6] - b[6]; + n[7] = a[7] - b[7]; + n[8] = a[8] - b[8]; + n[9] = a[9] - b[9]; + n[10] = a[10] - b[10]; + n[11] = a[11] - b[11]; + n[12] = a[12] - b[12]; + n[13] = a[13] - b[13]; + n[14] = a[14] - b[14]; + n[15] = a[15] - b[15]; + n[16] = a[16] - b[16]; + n[17] = a[17] - b[17]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[17] += ((spint)0x1b00u) & carry; + (void)prop(n); +} + +// Modular negation +static void modneg(const spint *b, spint *n) { + spint carry; + n[0] = (spint)0 - b[0]; + n[1] = (spint)0 - b[1]; + n[2] = (spint)0 - b[2]; + n[3] = (spint)0 - b[3]; + n[4] = (spint)0 - b[4]; + n[5] = (spint)0 - b[5]; + n[6] = (spint)0 - b[6]; + n[7] = (spint)0 - b[7]; + n[8] = (spint)0 - b[8]; + n[9] = (spint)0 - b[9]; + n[10] = (spint)0 - b[10]; + n[11] = (spint)0 - b[11]; + n[12] = (spint)0 - b[12]; + n[13] = (spint)0 - b[13]; + n[14] = (spint)0 - b[14]; + n[15] = (spint)0 - b[15]; + n[16] = (spint)0 - b[16]; + n[17] = (spint)0 - b[17]; + carry = prop(n); + n[0] -= (spint)2u & carry; + n[17] += ((spint)0x1b00u) & carry; + (void)prop(n); +} + +// Overflow limit = 18446744073709551616 +// maximum possible = 5188148641189065362 +// Modular multiplication, c=a*b mod 2p +static void modmul(const spint *a, const spint *b, spint *c) { + dpint t = 0; + spint p17 = 0xd80u; + spint q = ((spint)1 << 29u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + t += (dpint)a[0] * b[0]; + spint v0 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[1]; + t += (dpint)a[1] * b[0]; + spint v1 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[2]; + t += (dpint)a[1] * b[1]; + t += (dpint)a[2] * b[0]; + spint v2 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[3]; + t += (dpint)a[1] * b[2]; + t += (dpint)a[2] * b[1]; + t += (dpint)a[3] * b[0]; + spint v3 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[4]; + t += (dpint)a[1] * b[3]; + t += (dpint)a[2] * b[2]; + t += (dpint)a[3] * b[1]; + t += (dpint)a[4] * b[0]; + spint v4 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[5]; + t += (dpint)a[1] * b[4]; + t += (dpint)a[2] * b[3]; + t += (dpint)a[3] * b[2]; + t += (dpint)a[4] * b[1]; + t += (dpint)a[5] * b[0]; + spint v5 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[6]; + t += (dpint)a[1] * b[5]; + t += (dpint)a[2] * b[4]; + t += (dpint)a[3] * b[3]; + t += (dpint)a[4] * b[2]; + t += (dpint)a[5] * b[1]; + t += (dpint)a[6] * b[0]; + spint v6 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[7]; + t += (dpint)a[1] * b[6]; + t += (dpint)a[2] * b[5]; + t += (dpint)a[3] * b[4]; + t += (dpint)a[4] * b[3]; + t += (dpint)a[5] * b[2]; + t += (dpint)a[6] * b[1]; + t += (dpint)a[7] * b[0]; + spint v7 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[8]; + t += (dpint)a[1] * b[7]; + t += (dpint)a[2] * b[6]; + t += (dpint)a[3] * b[5]; + t += (dpint)a[4] * b[4]; + t += (dpint)a[5] * b[3]; + t += (dpint)a[6] * b[2]; + t += (dpint)a[7] * b[1]; + t += (dpint)a[8] * b[0]; + spint v8 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[9]; + t += (dpint)a[1] * b[8]; + t += (dpint)a[2] * b[7]; + t += (dpint)a[3] * b[6]; + t += (dpint)a[4] * b[5]; + t += (dpint)a[5] * b[4]; + t += (dpint)a[6] * b[3]; + t += (dpint)a[7] * b[2]; + t += (dpint)a[8] * b[1]; + t += (dpint)a[9] * b[0]; + spint v9 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[10]; + t += (dpint)a[1] * b[9]; + t += (dpint)a[2] * b[8]; + t += (dpint)a[3] * b[7]; + t += (dpint)a[4] * b[6]; + t += (dpint)a[5] * b[5]; + t += (dpint)a[6] * b[4]; + t += (dpint)a[7] * b[3]; + t += (dpint)a[8] * b[2]; + t += (dpint)a[9] * b[1]; + t += (dpint)a[10] * b[0]; + spint v10 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[11]; + t += (dpint)a[1] * b[10]; + t += (dpint)a[2] * b[9]; + t += (dpint)a[3] * b[8]; + t += (dpint)a[4] * b[7]; + t += (dpint)a[5] * b[6]; + t += (dpint)a[6] * b[5]; + t += (dpint)a[7] * b[4]; + t += (dpint)a[8] * b[3]; + t += (dpint)a[9] * b[2]; + t += (dpint)a[10] * b[1]; + t += (dpint)a[11] * b[0]; + spint v11 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[12]; + t += (dpint)a[1] * b[11]; + t += (dpint)a[2] * b[10]; + t += (dpint)a[3] * b[9]; + t += (dpint)a[4] * b[8]; + t += (dpint)a[5] * b[7]; + t += (dpint)a[6] * b[6]; + t += (dpint)a[7] * b[5]; + t += (dpint)a[8] * b[4]; + t += (dpint)a[9] * b[3]; + t += (dpint)a[10] * b[2]; + t += (dpint)a[11] * b[1]; + t += (dpint)a[12] * b[0]; + spint v12 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[13]; + t += (dpint)a[1] * b[12]; + t += (dpint)a[2] * b[11]; + t += (dpint)a[3] * b[10]; + t += (dpint)a[4] * b[9]; + t += (dpint)a[5] * b[8]; + t += (dpint)a[6] * b[7]; + t += (dpint)a[7] * b[6]; + t += (dpint)a[8] * b[5]; + t += (dpint)a[9] * b[4]; + t += (dpint)a[10] * b[3]; + t += (dpint)a[11] * b[2]; + t += (dpint)a[12] * b[1]; + t += (dpint)a[13] * b[0]; + spint v13 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[14]; + t += (dpint)a[1] * b[13]; + t += (dpint)a[2] * b[12]; + t += (dpint)a[3] * b[11]; + t += (dpint)a[4] * b[10]; + t += (dpint)a[5] * b[9]; + t += (dpint)a[6] * b[8]; + t += (dpint)a[7] * b[7]; + t += (dpint)a[8] * b[6]; + t += (dpint)a[9] * b[5]; + t += (dpint)a[10] * b[4]; + t += (dpint)a[11] * b[3]; + t += (dpint)a[12] * b[2]; + t += (dpint)a[13] * b[1]; + t += (dpint)a[14] * b[0]; + spint v14 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[15]; + t += (dpint)a[1] * b[14]; + t += (dpint)a[2] * b[13]; + t += (dpint)a[3] * b[12]; + t += (dpint)a[4] * b[11]; + t += (dpint)a[5] * b[10]; + t += (dpint)a[6] * b[9]; + t += (dpint)a[7] * b[8]; + t += (dpint)a[8] * b[7]; + t += (dpint)a[9] * b[6]; + t += (dpint)a[10] * b[5]; + t += (dpint)a[11] * b[4]; + t += (dpint)a[12] * b[3]; + t += (dpint)a[13] * b[2]; + t += (dpint)a[14] * b[1]; + t += (dpint)a[15] * b[0]; + spint v15 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[16]; + t += (dpint)a[1] * b[15]; + t += (dpint)a[2] * b[14]; + t += (dpint)a[3] * b[13]; + t += (dpint)a[4] * b[12]; + t += (dpint)a[5] * b[11]; + t += (dpint)a[6] * b[10]; + t += (dpint)a[7] * b[9]; + t += (dpint)a[8] * b[8]; + t += (dpint)a[9] * b[7]; + t += (dpint)a[10] * b[6]; + t += (dpint)a[11] * b[5]; + t += (dpint)a[12] * b[4]; + t += (dpint)a[13] * b[3]; + t += (dpint)a[14] * b[2]; + t += (dpint)a[15] * b[1]; + t += (dpint)a[16] * b[0]; + spint v16 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[0] * b[17]; + t += (dpint)a[1] * b[16]; + t += (dpint)a[2] * b[15]; + t += (dpint)a[3] * b[14]; + t += (dpint)a[4] * b[13]; + t += (dpint)a[5] * b[12]; + t += (dpint)a[6] * b[11]; + t += (dpint)a[7] * b[10]; + t += (dpint)a[8] * b[9]; + t += (dpint)a[9] * b[8]; + t += (dpint)a[10] * b[7]; + t += (dpint)a[11] * b[6]; + t += (dpint)a[12] * b[5]; + t += (dpint)a[13] * b[4]; + t += (dpint)a[14] * b[3]; + t += (dpint)a[15] * b[2]; + t += (dpint)a[16] * b[1]; + t += (dpint)a[17] * b[0]; + t += (dpint)v0 * (dpint)p17; + spint v17 = ((spint)t & mask); + t >>= 29; + t += (dpint)a[1] * b[17]; + t += (dpint)a[2] * b[16]; + t += (dpint)a[3] * b[15]; + t += (dpint)a[4] * b[14]; + t += (dpint)a[5] * b[13]; + t += (dpint)a[6] * b[12]; + t += (dpint)a[7] * b[11]; + t += (dpint)a[8] * b[10]; + t += (dpint)a[9] * b[9]; + t += (dpint)a[10] * b[8]; + t += (dpint)a[11] * b[7]; + t += (dpint)a[12] * b[6]; + t += (dpint)a[13] * b[5]; + t += (dpint)a[14] * b[4]; + t += (dpint)a[15] * b[3]; + t += (dpint)a[16] * b[2]; + t += (dpint)a[17] * b[1]; + t += (dpint)v1 * (dpint)p17; + c[0] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[2] * b[17]; + t += (dpint)a[3] * b[16]; + t += (dpint)a[4] * b[15]; + t += (dpint)a[5] * b[14]; + t += (dpint)a[6] * b[13]; + t += (dpint)a[7] * b[12]; + t += (dpint)a[8] * b[11]; + t += (dpint)a[9] * b[10]; + t += (dpint)a[10] * b[9]; + t += (dpint)a[11] * b[8]; + t += (dpint)a[12] * b[7]; + t += (dpint)a[13] * b[6]; + t += (dpint)a[14] * b[5]; + t += (dpint)a[15] * b[4]; + t += (dpint)a[16] * b[3]; + t += (dpint)a[17] * b[2]; + t += (dpint)v2 * (dpint)p17; + c[1] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[3] * b[17]; + t += (dpint)a[4] * b[16]; + t += (dpint)a[5] * b[15]; + t += (dpint)a[6] * b[14]; + t += (dpint)a[7] * b[13]; + t += (dpint)a[8] * b[12]; + t += (dpint)a[9] * b[11]; + t += (dpint)a[10] * b[10]; + t += (dpint)a[11] * b[9]; + t += (dpint)a[12] * b[8]; + t += (dpint)a[13] * b[7]; + t += (dpint)a[14] * b[6]; + t += (dpint)a[15] * b[5]; + t += (dpint)a[16] * b[4]; + t += (dpint)a[17] * b[3]; + t += (dpint)v3 * (dpint)p17; + c[2] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[4] * b[17]; + t += (dpint)a[5] * b[16]; + t += (dpint)a[6] * b[15]; + t += (dpint)a[7] * b[14]; + t += (dpint)a[8] * b[13]; + t += (dpint)a[9] * b[12]; + t += (dpint)a[10] * b[11]; + t += (dpint)a[11] * b[10]; + t += (dpint)a[12] * b[9]; + t += (dpint)a[13] * b[8]; + t += (dpint)a[14] * b[7]; + t += (dpint)a[15] * b[6]; + t += (dpint)a[16] * b[5]; + t += (dpint)a[17] * b[4]; + t += (dpint)v4 * (dpint)p17; + c[3] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[5] * b[17]; + t += (dpint)a[6] * b[16]; + t += (dpint)a[7] * b[15]; + t += (dpint)a[8] * b[14]; + t += (dpint)a[9] * b[13]; + t += (dpint)a[10] * b[12]; + t += (dpint)a[11] * b[11]; + t += (dpint)a[12] * b[10]; + t += (dpint)a[13] * b[9]; + t += (dpint)a[14] * b[8]; + t += (dpint)a[15] * b[7]; + t += (dpint)a[16] * b[6]; + t += (dpint)a[17] * b[5]; + t += (dpint)v5 * (dpint)p17; + c[4] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[6] * b[17]; + t += (dpint)a[7] * b[16]; + t += (dpint)a[8] * b[15]; + t += (dpint)a[9] * b[14]; + t += (dpint)a[10] * b[13]; + t += (dpint)a[11] * b[12]; + t += (dpint)a[12] * b[11]; + t += (dpint)a[13] * b[10]; + t += (dpint)a[14] * b[9]; + t += (dpint)a[15] * b[8]; + t += (dpint)a[16] * b[7]; + t += (dpint)a[17] * b[6]; + t += (dpint)v6 * (dpint)p17; + c[5] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[7] * b[17]; + t += (dpint)a[8] * b[16]; + t += (dpint)a[9] * b[15]; + t += (dpint)a[10] * b[14]; + t += (dpint)a[11] * b[13]; + t += (dpint)a[12] * b[12]; + t += (dpint)a[13] * b[11]; + t += (dpint)a[14] * b[10]; + t += (dpint)a[15] * b[9]; + t += (dpint)a[16] * b[8]; + t += (dpint)a[17] * b[7]; + t += (dpint)v7 * (dpint)p17; + c[6] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[8] * b[17]; + t += (dpint)a[9] * b[16]; + t += (dpint)a[10] * b[15]; + t += (dpint)a[11] * b[14]; + t += (dpint)a[12] * b[13]; + t += (dpint)a[13] * b[12]; + t += (dpint)a[14] * b[11]; + t += (dpint)a[15] * b[10]; + t += (dpint)a[16] * b[9]; + t += (dpint)a[17] * b[8]; + t += (dpint)v8 * (dpint)p17; + c[7] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[9] * b[17]; + t += (dpint)a[10] * b[16]; + t += (dpint)a[11] * b[15]; + t += (dpint)a[12] * b[14]; + t += (dpint)a[13] * b[13]; + t += (dpint)a[14] * b[12]; + t += (dpint)a[15] * b[11]; + t += (dpint)a[16] * b[10]; + t += (dpint)a[17] * b[9]; + t += (dpint)v9 * (dpint)p17; + c[8] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[10] * b[17]; + t += (dpint)a[11] * b[16]; + t += (dpint)a[12] * b[15]; + t += (dpint)a[13] * b[14]; + t += (dpint)a[14] * b[13]; + t += (dpint)a[15] * b[12]; + t += (dpint)a[16] * b[11]; + t += (dpint)a[17] * b[10]; + t += (dpint)v10 * (dpint)p17; + c[9] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[11] * b[17]; + t += (dpint)a[12] * b[16]; + t += (dpint)a[13] * b[15]; + t += (dpint)a[14] * b[14]; + t += (dpint)a[15] * b[13]; + t += (dpint)a[16] * b[12]; + t += (dpint)a[17] * b[11]; + t += (dpint)v11 * (dpint)p17; + c[10] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[12] * b[17]; + t += (dpint)a[13] * b[16]; + t += (dpint)a[14] * b[15]; + t += (dpint)a[15] * b[14]; + t += (dpint)a[16] * b[13]; + t += (dpint)a[17] * b[12]; + t += (dpint)v12 * (dpint)p17; + c[11] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[13] * b[17]; + t += (dpint)a[14] * b[16]; + t += (dpint)a[15] * b[15]; + t += (dpint)a[16] * b[14]; + t += (dpint)a[17] * b[13]; + t += (dpint)v13 * (dpint)p17; + c[12] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[14] * b[17]; + t += (dpint)a[15] * b[16]; + t += (dpint)a[16] * b[15]; + t += (dpint)a[17] * b[14]; + t += (dpint)v14 * (dpint)p17; + c[13] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[15] * b[17]; + t += (dpint)a[16] * b[16]; + t += (dpint)a[17] * b[15]; + t += (dpint)v15 * (dpint)p17; + c[14] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[16] * b[17]; + t += (dpint)a[17] * b[16]; + t += (dpint)v16 * (dpint)p17; + c[15] = ((spint)t & mask); + t >>= 29; + t += (dpint)a[17] * b[17]; + t += (dpint)v17 * (dpint)p17; + c[16] = ((spint)t & mask); + t >>= 29; + c[17] = (spint)t; +} + +// Modular squaring, c=a*a mod 2p +static void modsqr(const spint *a, spint *c) { + udpint tot; + udpint t = 0; + spint p17 = 0xd80u; + spint q = ((spint)1 << 29u); // q is unsaturated radix + spint mask = (spint)(q - (spint)1); + tot = (udpint)a[0] * a[0]; + t = tot; + spint v0 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[1]; + tot *= 2; + t += tot; + spint v1 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[2]; + tot *= 2; + tot += (udpint)a[1] * a[1]; + t += tot; + spint v2 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[3]; + tot += (udpint)a[1] * a[2]; + tot *= 2; + t += tot; + spint v3 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[4]; + tot += (udpint)a[1] * a[3]; + tot *= 2; + tot += (udpint)a[2] * a[2]; + t += tot; + spint v4 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[5]; + tot += (udpint)a[1] * a[4]; + tot += (udpint)a[2] * a[3]; + tot *= 2; + t += tot; + spint v5 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[6]; + tot += (udpint)a[1] * a[5]; + tot += (udpint)a[2] * a[4]; + tot *= 2; + tot += (udpint)a[3] * a[3]; + t += tot; + spint v6 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[7]; + tot += (udpint)a[1] * a[6]; + tot += (udpint)a[2] * a[5]; + tot += (udpint)a[3] * a[4]; + tot *= 2; + t += tot; + spint v7 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[8]; + tot += (udpint)a[1] * a[7]; + tot += (udpint)a[2] * a[6]; + tot += (udpint)a[3] * a[5]; + tot *= 2; + tot += (udpint)a[4] * a[4]; + t += tot; + spint v8 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[9]; + tot += (udpint)a[1] * a[8]; + tot += (udpint)a[2] * a[7]; + tot += (udpint)a[3] * a[6]; + tot += (udpint)a[4] * a[5]; + tot *= 2; + t += tot; + spint v9 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[10]; + tot += (udpint)a[1] * a[9]; + tot += (udpint)a[2] * a[8]; + tot += (udpint)a[3] * a[7]; + tot += (udpint)a[4] * a[6]; + tot *= 2; + tot += (udpint)a[5] * a[5]; + t += tot; + spint v10 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[11]; + tot += (udpint)a[1] * a[10]; + tot += (udpint)a[2] * a[9]; + tot += (udpint)a[3] * a[8]; + tot += (udpint)a[4] * a[7]; + tot += (udpint)a[5] * a[6]; + tot *= 2; + t += tot; + spint v11 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[12]; + tot += (udpint)a[1] * a[11]; + tot += (udpint)a[2] * a[10]; + tot += (udpint)a[3] * a[9]; + tot += (udpint)a[4] * a[8]; + tot += (udpint)a[5] * a[7]; + tot *= 2; + tot += (udpint)a[6] * a[6]; + t += tot; + spint v12 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[13]; + tot += (udpint)a[1] * a[12]; + tot += (udpint)a[2] * a[11]; + tot += (udpint)a[3] * a[10]; + tot += (udpint)a[4] * a[9]; + tot += (udpint)a[5] * a[8]; + tot += (udpint)a[6] * a[7]; + tot *= 2; + t += tot; + spint v13 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[14]; + tot += (udpint)a[1] * a[13]; + tot += (udpint)a[2] * a[12]; + tot += (udpint)a[3] * a[11]; + tot += (udpint)a[4] * a[10]; + tot += (udpint)a[5] * a[9]; + tot += (udpint)a[6] * a[8]; + tot *= 2; + tot += (udpint)a[7] * a[7]; + t += tot; + spint v14 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[15]; + tot += (udpint)a[1] * a[14]; + tot += (udpint)a[2] * a[13]; + tot += (udpint)a[3] * a[12]; + tot += (udpint)a[4] * a[11]; + tot += (udpint)a[5] * a[10]; + tot += (udpint)a[6] * a[9]; + tot += (udpint)a[7] * a[8]; + tot *= 2; + t += tot; + spint v15 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[16]; + tot += (udpint)a[1] * a[15]; + tot += (udpint)a[2] * a[14]; + tot += (udpint)a[3] * a[13]; + tot += (udpint)a[4] * a[12]; + tot += (udpint)a[5] * a[11]; + tot += (udpint)a[6] * a[10]; + tot += (udpint)a[7] * a[9]; + tot *= 2; + tot += (udpint)a[8] * a[8]; + t += tot; + spint v16 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[0] * a[17]; + tot += (udpint)a[1] * a[16]; + tot += (udpint)a[2] * a[15]; + tot += (udpint)a[3] * a[14]; + tot += (udpint)a[4] * a[13]; + tot += (udpint)a[5] * a[12]; + tot += (udpint)a[6] * a[11]; + tot += (udpint)a[7] * a[10]; + tot += (udpint)a[8] * a[9]; + tot *= 2; + t += tot; + t += (udpint)v0 * p17; + spint v17 = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[1] * a[17]; + tot += (udpint)a[2] * a[16]; + tot += (udpint)a[3] * a[15]; + tot += (udpint)a[4] * a[14]; + tot += (udpint)a[5] * a[13]; + tot += (udpint)a[6] * a[12]; + tot += (udpint)a[7] * a[11]; + tot += (udpint)a[8] * a[10]; + tot *= 2; + tot += (udpint)a[9] * a[9]; + t += tot; + t += (udpint)v1 * p17; + c[0] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[2] * a[17]; + tot += (udpint)a[3] * a[16]; + tot += (udpint)a[4] * a[15]; + tot += (udpint)a[5] * a[14]; + tot += (udpint)a[6] * a[13]; + tot += (udpint)a[7] * a[12]; + tot += (udpint)a[8] * a[11]; + tot += (udpint)a[9] * a[10]; + tot *= 2; + t += tot; + t += (udpint)v2 * p17; + c[1] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[3] * a[17]; + tot += (udpint)a[4] * a[16]; + tot += (udpint)a[5] * a[15]; + tot += (udpint)a[6] * a[14]; + tot += (udpint)a[7] * a[13]; + tot += (udpint)a[8] * a[12]; + tot += (udpint)a[9] * a[11]; + tot *= 2; + tot += (udpint)a[10] * a[10]; + t += tot; + t += (udpint)v3 * p17; + c[2] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[4] * a[17]; + tot += (udpint)a[5] * a[16]; + tot += (udpint)a[6] * a[15]; + tot += (udpint)a[7] * a[14]; + tot += (udpint)a[8] * a[13]; + tot += (udpint)a[9] * a[12]; + tot += (udpint)a[10] * a[11]; + tot *= 2; + t += tot; + t += (udpint)v4 * p17; + c[3] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[5] * a[17]; + tot += (udpint)a[6] * a[16]; + tot += (udpint)a[7] * a[15]; + tot += (udpint)a[8] * a[14]; + tot += (udpint)a[9] * a[13]; + tot += (udpint)a[10] * a[12]; + tot *= 2; + tot += (udpint)a[11] * a[11]; + t += tot; + t += (udpint)v5 * p17; + c[4] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[6] * a[17]; + tot += (udpint)a[7] * a[16]; + tot += (udpint)a[8] * a[15]; + tot += (udpint)a[9] * a[14]; + tot += (udpint)a[10] * a[13]; + tot += (udpint)a[11] * a[12]; + tot *= 2; + t += tot; + t += (udpint)v6 * p17; + c[5] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[7] * a[17]; + tot += (udpint)a[8] * a[16]; + tot += (udpint)a[9] * a[15]; + tot += (udpint)a[10] * a[14]; + tot += (udpint)a[11] * a[13]; + tot *= 2; + tot += (udpint)a[12] * a[12]; + t += tot; + t += (udpint)v7 * p17; + c[6] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[8] * a[17]; + tot += (udpint)a[9] * a[16]; + tot += (udpint)a[10] * a[15]; + tot += (udpint)a[11] * a[14]; + tot += (udpint)a[12] * a[13]; + tot *= 2; + t += tot; + t += (udpint)v8 * p17; + c[7] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[9] * a[17]; + tot += (udpint)a[10] * a[16]; + tot += (udpint)a[11] * a[15]; + tot += (udpint)a[12] * a[14]; + tot *= 2; + tot += (udpint)a[13] * a[13]; + t += tot; + t += (udpint)v9 * p17; + c[8] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[10] * a[17]; + tot += (udpint)a[11] * a[16]; + tot += (udpint)a[12] * a[15]; + tot += (udpint)a[13] * a[14]; + tot *= 2; + t += tot; + t += (udpint)v10 * p17; + c[9] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[11] * a[17]; + tot += (udpint)a[12] * a[16]; + tot += (udpint)a[13] * a[15]; + tot *= 2; + tot += (udpint)a[14] * a[14]; + t += tot; + t += (udpint)v11 * p17; + c[10] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[12] * a[17]; + tot += (udpint)a[13] * a[16]; + tot += (udpint)a[14] * a[15]; + tot *= 2; + t += tot; + t += (udpint)v12 * p17; + c[11] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[13] * a[17]; + tot += (udpint)a[14] * a[16]; + tot *= 2; + tot += (udpint)a[15] * a[15]; + t += tot; + t += (udpint)v13 * p17; + c[12] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[14] * a[17]; + tot += (udpint)a[15] * a[16]; + tot *= 2; + t += tot; + t += (udpint)v14 * p17; + c[13] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[15] * a[17]; + tot *= 2; + tot += (udpint)a[16] * a[16]; + t += tot; + t += (udpint)v15 * p17; + c[14] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[16] * a[17]; + tot *= 2; + t += tot; + t += (udpint)v16 * p17; + c[15] = ((spint)t & mask); + t >>= 29; + tot = (udpint)a[17] * a[17]; + t += tot; + t += (udpint)v17 * p17; + c[16] = ((spint)t & mask); + t >>= 29; + c[17] = (spint)t; +} + +// copy +static void modcpy(const spint *a, spint *c) { + int i; + for (i = 0; i < 18; i++) { + c[i] = a[i]; + } +} + +// square n times +static void modnsqr(spint *a, int n) { + int i; + for (i = 0; i < n; i++) { + modsqr(a, a); + } +} + +// Calculate progenitor +static void modpro(const spint *w, spint *z) { + spint x[18]; + spint t0[18]; + spint t1[18]; + spint t2[18]; + spint t3[18]; + spint t4[18]; + spint t5[18]; + spint t6[18]; + modcpy(w, x); + modcpy(x, z); + modnsqr(z, 2); + modmul(x, z, t0); + modmul(x, t0, z); + modsqr(z, t1); + modmul(x, t1, t1); + modsqr(t1, t3); + modsqr(t3, t2); + modmul(t3, t2, t4); + modsqr(t4, t5); + modcpy(t5, t2); + modnsqr(t2, 2); + modsqr(t2, t6); + modmul(t2, t6, t6); + modmul(t5, t6, t5); + modnsqr(t5, 5); + modmul(t2, t5, t2); + modcpy(t2, t5); + modnsqr(t5, 12); + modmul(t2, t5, t2); + modcpy(t2, t5); + modnsqr(t5, 2); + modmul(t2, t5, t5); + modmul(t4, t5, t4); + modsqr(t4, t5); + modmul(t2, t5, t2); + modmul(t4, t2, t4); + modnsqr(t4, 27); + modmul(t2, t4, t2); + modmul(t1, t2, t2); + modcpy(t2, t4); + modnsqr(t4, 2); + modmul(t3, t4, t3); + modnsqr(t3, 58); + modmul(t2, t3, t2); + modmul(z, t2, z); + modcpy(z, t2); + modnsqr(t2, 4); + modmul(t1, t2, t1); + modmul(t0, t1, t0); + modmul(t1, t0, t1); + modsqr(t1, t2); + modmul(t0, t2, t0); + modcpy(t0, t2); + modnsqr(t2, 2); + modmul(t0, t2, t2); + modmul(t1, t2, t1); + modmul(t0, t1, t0); + modnsqr(t1, 128); + modmul(t0, t1, t1); + modnsqr(t1, 128); + modmul(t0, t1, t0); + modnsqr(t0, 119); + modmul(z, t0, z); +} + +// calculate inverse, provide progenitor h if available +static void modinv(const spint *x, const spint *h, spint *z) { + spint s[18]; + spint t[18]; + if (h == NULL) { + modpro(x, t); + } else { + modcpy(h, t); + } + modcpy(x, s); + modnsqr(t, 2); + modmul(s, t, z); +} + +// Convert m to n-residue form, n=nres(m) +static void nres(const spint *m, spint *n) { + const spint c[18] = {0x19a29700u, 0x12f6878u, 0x17b425edu, 0x1a12f684u, + 0x97b425eu, 0x1da12f68u, 0x1097b425u, 0xbda12f6u, + 0xd097b42u, 0x4bda12fu, 0x1ed097b4u, 0x84bda12u, + 0x5ed097bu, 0x1684bda1u, 0x25ed097u, 0xf684bdau, + 0x1425ed09u, 0x4bdu}; + modmul(m, c, n); +} + +// Convert n back to normal form, m=redc(n) +static void redc(const spint *n, spint *m) { + int i; + spint c[18]; + c[0] = 1; + for (i = 1; i < 18; i++) { + c[i] = 0; + } + modmul(n, c, m); + (void)modfsb(m); +} + +// is unity? +static int modis1(const spint *a) { + int i; + spint c[18]; + spint c0; + spint d = 0; + redc(a, c); + for (i = 1; i < 18; i++) { + d |= c[i]; + } + c0 = (spint)c[0]; + return ((spint)1 & ((d - (spint)1) >> 29u) & + (((c0 ^ (spint)1) - (spint)1) >> 29u)); +} + +// is zero? +static int modis0(const spint *a) { + int i; + spint c[18]; + spint d = 0; + redc(a, c); + for (i = 0; i < 18; i++) { + d |= c[i]; + } + return ((spint)1 & ((d - (spint)1) >> 29u)); +} + +// set to zero +static void modzer(spint *a) { + int i; + for (i = 0; i < 18; i++) { + a[i] = 0; + } +} + +// set to one +static void modone(spint *a) { + int i; + a[0] = 1; + for (i = 1; i < 18; i++) { + a[i] = 0; + } + nres(a, a); +} + +// set to integer +static void modint(int x, spint *a) { + int i; + a[0] = (spint)x; + for (i = 1; i < 18; i++) { + a[i] = 0; + } + nres(a, a); +} + +// Modular multiplication by an integer, c=a*b mod 2p +static void modmli(const spint *a, int b, spint *c) { + spint t[18]; + modint(b, t); + modmul(a, t, c); +} + +// Test for quadratic residue +static int modqr(const spint *h, const spint *x) { + spint r[18]; + if (h == NULL) { + modpro(x, r); + modsqr(r, r); + } else { + modsqr(h, r); + } + modmul(r, x, r); + return modis1(r) | modis0(x); +} + +// conditional move g to f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcmv(int b, const spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 18; i++) { + s = g[i]; + t = f[i]; + f[i] = c0 * t + c1 * s; + f[i] -= r * (t + s); + } +} + +// conditional swap g and f if d=1 +// strongly recommend inlining be disabled using compiler specific syntax +static void modcsw(int b, volatile spint *g, volatile spint *f) { + int i; + spint c0, c1, s, t, w; + spint r = 0x5aa5a55au; + c0 = (1 - b) + r; + c1 = b + r; + for (i = 0; i < 18; i++) { + s = g[i]; + t = f[i]; + w = r * (t + s); + f[i] = c0 * t + c1 * s; + f[i] -= w; + g[i] = c0 * s + c1 * t; + g[i] -= w; + } +} + +// Modular square root, provide progenitor h if available, NULL if not +static void modsqrt(const spint *x, const spint *h, spint *r) { + spint s[18]; + spint y[18]; + if (h == NULL) { + modpro(x, y); + } else { + modcpy(h, y); + } + modmul(y, x, s); + modcpy(s, r); +} + +// shift left by less than a word +static void modshl(unsigned int n, spint *a) { + int i; + a[17] = ((a[17] << n)) | (a[16] >> (29u - n)); + for (i = 16; i > 0; i--) { + a[i] = ((a[i] << n) & (spint)0x1fffffff) | (a[i - 1] >> (29u - n)); + } + a[0] = (a[0] << n) & (spint)0x1fffffff; +} + +// shift right by less than a word. Return shifted out part +static int modshr(unsigned int n, spint *a) { + int i; + spint r = a[0] & (((spint)1 << n) - (spint)1); + for (i = 0; i < 17; i++) { + a[i] = (a[i] >> n) | ((a[i + 1] << (29u - n)) & (spint)0x1fffffff); + } + a[17] = a[17] >> n; + return r; +} + +// set a= 2^r +static void mod2r(unsigned int r, spint *a) { + unsigned int n = r / 29u; + unsigned int m = r % 29u; + modzer(a); + if (r >= 64 * 8) + return; + a[n] = 1; + a[n] <<= m; + nres(a, a); +} + +// export to byte array +static void modexp(const spint *a, char *b) { + int i; + spint c[18]; + redc(a, c); + for (i = 63; i >= 0; i--) { + b[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +// import from byte array +// returns 1 if in range, else 0 +static int modimp(const char *b, spint *a) { + int i, res; + for (i = 0; i < 18; i++) { + a[i] = 0; + } + for (i = 0; i < 64; i++) { + modshl(8, a); + a[0] += (spint)(unsigned char)b[i]; + } + res = modfsb(a); + nres(a, a); + return res; +} + +// determine sign +static int modsign(const spint *a) { + spint c[18]; + redc(a, c); + return c[0] % 2; +} + +// return true if equal +static int modcmp(const spint *a, const spint *b) { + spint c[18], d[18]; + int i, eq = 1; + redc(a, c); + redc(b, d); + for (i = 0; i < 18; i++) { + eq &= (((c[i] ^ d[i]) - 1) >> 29) & 1; + } + return eq; +} + +// clang-format on +/****************************************************************************** + API functions calling generated code above + ******************************************************************************/ + +#include + +const digit_t ZERO[NWORDS_FIELD] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; +const digit_t ONE[NWORDS_FIELD] = { 0x00025ed0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000800 }; +// Montgomery representation of 2^-1 +static const digit_t TWO_INV[NWORDS_FIELD] = { 0x00012f68, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000400 }; +// Montgomery representation of 3^-1 +static const digit_t THREE_INV[NWORDS_FIELD] = { + 0x15561f9a, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, + 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x0aaaaaaa, 0x15555555, 0x00000baa +}; +// Montgomery representation of 2^512 +static const digit_t R2[NWORDS_FIELD] = { 0x03c668a5, 0x0f684bda, 0x1425ed09, 0x12f684bd, 0x1b425ed0, 0x012f684b, + 0x17b425ed, 0x1a12f684, 0x097b425e, 0x1da12f68, 0x1097b425, 0x0bda12f6, + 0x0d097b42, 0x04bda12f, 0x1ed097b4, 0x084bda12, 0x05ed097b, 0x00000a21 }; + +void +fp_set_small(fp_t *x, const digit_t val) +{ + modint((int)val, *x); +} + +void +fp_mul_small(fp_t *x, const fp_t *a, const uint32_t val) +{ + modmli(*a, (int)val, *x); +} + +void +fp_set_zero(fp_t *x) +{ + modzer(*x); +} + +void +fp_set_one(fp_t *x) +{ + modone(*x); +} + +uint32_t +fp_is_equal(const fp_t *a, const fp_t *b) +{ + return -(uint32_t)modcmp(*a, *b); +} + +uint32_t +fp_is_zero(const fp_t *a) +{ + return -(uint32_t)modis0(*a); +} + +void +fp_copy(fp_t *out, const fp_t *a) +{ + modcpy(*a, *out); +} + +void +fp_cswap(fp_t *a, fp_t *b, uint32_t ctl) +{ + modcsw((int)(ctl & 0x1), *a, *b); +} + +void +fp_add(fp_t *out, const fp_t *a, const fp_t *b) +{ + modadd(*a, *b, *out); +} + +void +fp_sub(fp_t *out, const fp_t *a, const fp_t *b) +{ + modsub(*a, *b, *out); +} + +void +fp_neg(fp_t *out, const fp_t *a) +{ + modneg(*a, *out); +} + +void +fp_sqr(fp_t *out, const fp_t *a) +{ + modsqr(*a, *out); +} + +void +fp_mul(fp_t *out, const fp_t *a, const fp_t *b) +{ + modmul(*a, *b, *out); +} + +void +fp_inv(fp_t *x) +{ + modinv(*x, NULL, *x); +} + +uint32_t +fp_is_square(const fp_t *a) +{ + return -(uint32_t)modqr(NULL, *a); +} + +void +fp_sqrt(fp_t *a) +{ + modsqrt(*a, NULL, *a); +} + +void +fp_half(fp_t *out, const fp_t *a) +{ + modmul(TWO_INV, *a, *out); +} + +void +fp_exp3div4(fp_t *out, const fp_t *a) +{ + modpro(*a, *out); +} + +void +fp_div3(fp_t *out, const fp_t *a) +{ + modmul(THREE_INV, *a, *out); +} + +void +fp_encode(void *dst, const fp_t *a) +{ + // Modified version of modexp() + int i; + spint c[18]; + redc(*a, c); + for (i = 0; i < 64; i++) { + ((char *)dst)[i] = c[0] & (spint)0xff; + (void)modshr(8, c); + } +} + +uint32_t +fp_decode(fp_t *d, const void *src) +{ + // Modified version of modimp() + int i; + spint res; + const unsigned char *b = src; + for (i = 0; i < 18; i++) { + (*d)[i] = 0; + } + for (i = 63; i >= 0; i--) { + modshl(8, *d); + (*d)[0] += (spint)b[i]; + } + res = (spint)-modfsb(*d); + nres(*d, *d); + // If the value was canonical then res = -1; otherwise, res = 0 + for (i = 0; i < 18; i++) { + (*d)[i] &= res; + } + return (uint32_t)res; +} + +static inline unsigned char +add_carry(unsigned char cc, spint a, spint b, spint *d) +{ + udpint t = (udpint)a + (udpint)b + cc; + *d = (spint)t; + return (unsigned char)(t >> Wordlength); +} + +static void +partial_reduce(spint *out, const spint *src) +{ + spint h, l, quo, rem; + unsigned char cc; + + // Split value in high (12 bits) and low (500 bits) parts. + h = src[15] >> 20; + l = src[15] & 0x000FFFFF; + + // 27*2^500 = 1 mod q; hence, we add floor(h/27) + (h mod 27)*2^500 + // to the low part. + quo = (h * 0x12F7) >> 17; + rem = h - (27 * quo); + cc = add_carry(0, src[0], quo, &out[0]); + cc = add_carry(cc, src[1], 0, &out[1]); + cc = add_carry(cc, src[2], 0, &out[2]); + cc = add_carry(cc, src[3], 0, &out[3]); + cc = add_carry(cc, src[4], 0, &out[4]); + cc = add_carry(cc, src[5], 0, &out[5]); + cc = add_carry(cc, src[6], 0, &out[6]); + cc = add_carry(cc, src[7], 0, &out[7]); + cc = add_carry(cc, src[8], 0, &out[8]); + cc = add_carry(cc, src[9], 0, &out[9]); + cc = add_carry(cc, src[10], 0, &out[10]); + cc = add_carry(cc, src[11], 0, &out[11]); + cc = add_carry(cc, src[12], 0, &out[12]); + cc = add_carry(cc, src[13], 0, &out[13]); + cc = add_carry(cc, src[14], 0, &out[14]); + (void)add_carry(cc, l, rem << 20, &out[15]); +} + +// Little-endian encoding of a 32-bit integer. +static inline void +enc32le(void *dst, uint32_t x) +{ + uint8_t *buf = dst; + buf[0] = (uint8_t)x; + buf[1] = (uint8_t)(x >> 8); + buf[2] = (uint8_t)(x >> 16); + buf[3] = (uint8_t)(x >> 24); +} + +// Little-endian decoding of a 32-bit integer. +static inline uint32_t +dec32le(const void *src) +{ + const uint8_t *buf = src; + return (spint)buf[0] | ((spint)buf[1] << 8) | ((spint)buf[2] << 16) | ((spint)buf[3] << 24); +} + +void +fp_decode_reduce(fp_t *d, const void *src, size_t len) +{ + uint32_t t[16]; // Stores Nbytes * 8 bits + uint8_t tmp[64]; // Nbytes + const uint8_t *b = src; + + fp_set_zero(d); + if (len == 0) { + return; + } + + size_t rem = len % 64; + if (rem != 0) { + // Input size is not a multiple of 64, we decode a partial + // block, which is already less than 2^500. + size_t k = len - rem; + memcpy(tmp, b + k, len - k); + memset(tmp + len - k, 0, (sizeof tmp) - (len - k)); + fp_decode(d, tmp); + len = k; + } + // Process all remaining blocks, in descending address order. + while (len > 0) { + fp_mul(d, d, &R2); + len -= 64; + t[0] = dec32le(b + len); + t[1] = dec32le(b + len + 4); + t[2] = dec32le(b + len + 8); + t[3] = dec32le(b + len + 12); + t[4] = dec32le(b + len + 16); + t[5] = dec32le(b + len + 20); + t[6] = dec32le(b + len + 24); + t[7] = dec32le(b + len + 28); + t[8] = dec32le(b + len + 32); + t[9] = dec32le(b + len + 36); + t[10] = dec32le(b + len + 40); + t[11] = dec32le(b + len + 44); + t[12] = dec32le(b + len + 48); + t[13] = dec32le(b + len + 52); + t[14] = dec32le(b + len + 56); + t[15] = dec32le(b + len + 60); + partial_reduce(t, t); + enc32le(tmp, t[0]); + enc32le(tmp + 4, t[1]); + enc32le(tmp + 8, t[2]); + enc32le(tmp + 12, t[3]); + enc32le(tmp + 16, t[4]); + enc32le(tmp + 20, t[5]); + enc32le(tmp + 24, t[6]); + enc32le(tmp + 28, t[7]); + enc32le(tmp + 32, t[8]); + enc32le(tmp + 36, t[9]); + enc32le(tmp + 40, t[10]); + enc32le(tmp + 44, t[11]); + enc32le(tmp + 48, t[12]); + enc32le(tmp + 52, t[13]); + enc32le(tmp + 56, t[14]); + enc32le(tmp + 60, t[15]); + fp_t a; + fp_decode(&a, tmp); + fp_add(d, d, &a); + } +} diff --git a/src/pqm4/sqisign_lvl5/ref/hd.c b/src/pqm4/sqisign_lvl5/ref/hd.c new file mode 100644 index 0000000..0424108 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/hd.c @@ -0,0 +1,93 @@ +#include +#include + +void +double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2) +{ + ec_dbl(&out->P1, &in->P1, &E1E2->E1); + ec_dbl(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + memmove(out, in, sizeof(theta_couple_point_t)); + } else { + double_couple_point(out, in, E1E2); + for (unsigned i = 0; i < n - 1; i++) { + double_couple_point(out, out, E1E2); + } + } +} + +void +add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2) +{ + ADD(&out->P1, &T1->P1, &T2->P1, &E1E2->E1); + ADD(&out->P2, &T1->P2, &T2->P2, &E1E2->E2); +} + +void +double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + DBL(&out->P1, &in->P1, &E1E2->E1); + DBL(&out->P2, &in->P2, &E1E2->E2); +} + +void +double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2) +{ + if (n == 0) { + *out = *in; + } else if (n == 1) { + double_couple_jac_point(out, in, E1E2); + } else { + fp2_t a1, a2, t1, t2; + + jac_to_ws(&out->P1, &t1, &a1, &in->P1, &E1E2->E1); + jac_to_ws(&out->P2, &t2, &a2, &in->P2, &E1E2->E2); + + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + for (unsigned i = 0; i < n - 1; i++) { + DBLW(&out->P1, &t1, &out->P1, &t1); + DBLW(&out->P2, &t2, &out->P2, &t2); + } + + jac_from_ws(&out->P1, &out->P1, &a1, &E1E2->E1); + jac_from_ws(&out->P2, &out->P2, &a2, &E1E2->E2); + } +} + +void +couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP) +{ + jac_to_xz(&P->P1, &xyP->P1); + jac_to_xz(&P->P2, &xyP->P2); +} + +void +copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2) +{ + // Copy the basis on E1 to (P, _) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P1, &B1->P); + copy_point(&ker->T2.P1, &B1->Q); + copy_point(&ker->T1m2.P1, &B1->PmQ); + + // Copy the basis on E2 to (_, P) on T1, T2 and T1 - T2 + copy_point(&ker->T1.P2, &B2->P); + copy_point(&ker->T2.P2, &B2->Q); + copy_point(&ker->T1m2.P2, &B2->PmQ); +} diff --git a/src/pqm4/sqisign_lvl5/ref/hd.h b/src/pqm4/sqisign_lvl5/ref/hd.h new file mode 100644 index 0000000..2b16e23 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/hd.h @@ -0,0 +1,435 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief The HD-isogenies algorithm required by the signature + * + */ + +#ifndef HD_H +#define HD_H + +#include +#include +#include + +/** @defgroup hd_module Abelian surfaces and their isogenies + * @{ + */ + +#define HD_extra_torsion 2 + +/** @defgroup hd_struct Data structures for dimension 2 + * @{ + */ + +/** @brief Type for couple point with XZ coordinates + * @typedef theta_couple_point_t + * + * @struct theta_couple_point + * + * Structure for the couple point on an elliptic product + * using XZ coordinates + */ +typedef struct theta_couple_point +{ + ec_point_t P1; + ec_point_t P2; +} theta_couple_point_t; + +/** @brief Type for three couple points T1, T2, T1-T2 with XZ coordinates + * @typedef theta_kernel_couple_points_t + * + * @struct theta_kernel_couple_points + * + * Structure for a triple of theta couple points T1, T2 and T1 - T2 + */ +typedef struct theta_kernel_couple_points +{ + theta_couple_point_t T1; + theta_couple_point_t T2; + theta_couple_point_t T1m2; +} theta_kernel_couple_points_t; + +/** @brief Type for couple point with XYZ coordinates + * @typedef theta_couple_jac_point_t + * + * @struct theta_couple_jac_point + * + * Structure for the couple point on an elliptic product + * using XYZ coordinates + */ +typedef struct theta_couple_jac_point +{ + jac_point_t P1; + jac_point_t P2; +} theta_couple_jac_point_t; + +/** @brief Type for couple curve * + * @typedef theta_couple_curve_t + * + * @struct theta_couple_curve + * + * the theta_couple_curve structure + */ +typedef struct theta_couple_curve +{ + ec_curve_t E1; + ec_curve_t E2; +} theta_couple_curve_t; + +/** @brief Type for a product E1 x E2 with corresponding bases + * @typedef theta_couple_curve_with_basis_t + * + * @struct theta_couple_curve_with_basis + * + * tType for a product E1 x E2 with corresponding bases Ei[2^n] + */ +typedef struct theta_couple_curve_with_basis +{ + ec_curve_t E1; + ec_curve_t E2; + ec_basis_t B1; + ec_basis_t B2; +} theta_couple_curve_with_basis_t; + +/** @brief Type for theta point * + * @typedef theta_point_t + * + * @struct theta_point + * + * the theta_point structure used + */ +typedef struct theta_point +{ + fp2_t x; + fp2_t y; + fp2_t z; + fp2_t t; +} theta_point_t; + +/** @brief Type for theta point with repeating components + * @typedef theta_point_compact_t + * + * @struct theta_point_compact + * + * the theta_point structure used for points with repeated components + */ +typedef struct theta_point_compact +{ + fp2_t x; + fp2_t y; +} theta_point_compact_t; + +/** @brief Type for theta structure * + * @typedef theta_structure_t + * + * @struct theta_structure + * + * the theta_structure structure used + */ +typedef struct theta_structure +{ + theta_point_t null_point; + bool precomputation; + + // Eight precomputed values used for doubling and + // (2,2)-isogenies. + fp2_t XYZ0; + fp2_t YZT0; + fp2_t XZT0; + fp2_t XYT0; + + fp2_t xyz0; + fp2_t yzt0; + fp2_t xzt0; + fp2_t xyt0; +} theta_structure_t; + +/** @brief A 2x2 matrix used for action by translation + * @typedef translation_matrix_t + * + * @struct translation_matrix + * + * Structure to hold 4 fp2_t elements representing a 2x2 matrix used when computing + * a compatible theta structure during gluing. + */ +typedef struct translation_matrix +{ + fp2_t g00; + fp2_t g01; + fp2_t g10; + fp2_t g11; +} translation_matrix_t; + +/** @brief A 4x4 matrix used for basis changes + * @typedef basis_change_matrix_t + * + * @struct basis_change_matrix + * + * Structure to hold 16 elements representing a 4x4 matrix used for changing + * the basis of a theta point. + */ +typedef struct basis_change_matrix +{ + fp2_t m[4][4]; +} basis_change_matrix_t; + +/** @brief Type for gluing (2,2) theta isogeny * + * @typedef theta_gluing_t + * + * @struct theta_gluing + * + * the theta_gluing structure + */ +typedef struct theta_gluing +{ + + theta_couple_curve_t domain; + theta_couple_jac_point_t xyK1_8; + theta_point_compact_t imageK1_8; + basis_change_matrix_t M; + theta_point_t precomputation; + theta_point_t codomain; + +} theta_gluing_t; + +/** @brief Type for standard (2,2) theta isogeny * + * @typedef theta_isogeny_t + * + * @struct theta_isogeny + * + * the theta_isogeny structure + */ +typedef struct theta_isogeny +{ + theta_point_t T1_8; + theta_point_t T2_8; + bool hadamard_bool_1; + bool hadamard_bool_2; + theta_structure_t domain; + theta_point_t precomputation; + theta_structure_t codomain; +} theta_isogeny_t; + +/** @brief Type for splitting isomorphism * + * @typedef theta_splitting_t + * + * @struct theta_splitting + * + * the theta_splitting structure + */ +typedef struct theta_splitting +{ + basis_change_matrix_t M; + theta_structure_t B; + +} theta_splitting_t; + +// end of hd_struct +/** + * @} + */ + +/** @defgroup hd_functions Functions for dimension 2 + * @{ + */ + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_point(theta_couple_point_t *out, const theta_couple_point_t *in, const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param n : the number of iteration + * @param E1E2 an elliptic product + * @param in the theta couple point in the elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_point_iter(theta_couple_point_t *out, + unsigned n, + const theta_couple_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the addition of two points in (X : Y : Z) coordinates on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param T1 the theta couple jac point in the elliptic product + * @param T2 the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1, P2), (Q1, Q2) + * out = (P1 + Q1, P2 + Q2) + * + **/ +void add_couple_jac_points(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *T1, + const theta_couple_jac_point_t *T2, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the double of the theta couple point in on the elliptic product E12 + * + * @param out Output: the theta_couple_point + * @param in the theta couple point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2] (P1,P2) + * + */ +void double_couple_jac_point(theta_couple_jac_point_t *out, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief Compute the iterated double of the theta couple jac point in on the elliptic product E12 + * + * @param out Output: the theta_couple_jac_point + * @param n : the number of iteration + * @param in the theta couple jac point in the elliptic product + * @param E1E2 an elliptic product + * in = (P1,P2) + * out = [2^n] (P1,P2) + * + */ +void double_couple_jac_point_iter(theta_couple_jac_point_t *out, + unsigned n, + const theta_couple_jac_point_t *in, + const theta_couple_curve_t *E1E2); + +/** + * @brief A forgetful function which returns (X : Z) points given a pair of (X : Y : Z) points + * + * @param P Output: the theta_couple_point + * @param xyP : the theta_couple_jac_point + **/ +void couple_jac_to_xz(theta_couple_point_t *P, const theta_couple_jac_point_t *xyP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it does extra isotropy + * checks on the kernel. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success 0 on failure + * + */ +int theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Compute a (2,2) isogeny chain in dimension 2 between elliptic + * products in the theta_model and evaluate at a list of points of the form + * (P1,0) or (0,P2). Returns 0 if the codomain fails to split (or there is + * an error during the computation) and 1 otherwise. + * Compared to theta_chain_compute_and_eval, it selects a random Montgomery + * model of the codomain. + * + * @param n : the length of the isogeny chain + * @param E12 an elliptic curve product + * @param ker T1, T2 and T1-T2. couple points on E12[2^(n+2)] + * @param extra_torsion boolean indicating if we give the points in E12[2^n] or + * E12[2^(n+HD_extra_torsion)] + * @param E34 Output: the codomain curve + * @param P12 Input/Output: pointer to points to be pushed through the isogeny (in-place) + * @param numP: length of the list of points given in P12 (can be zero) + * @returns 1 on success, 0 on failure + * + */ +int theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP); + +/** + * @brief Given a bases B1 on E1 and B2 on E2 copies this to create a kernel + * on E1 x E2 as couple points T1, T2 and T1 - T2 + * + * @param ker Output: a kernel for dim_two_isogenies (T1, T2, T1-T2) + * @param B1 Input basis on E1 + * @param B2 Input basis on E2 + **/ +void copy_bases_to_kernel(theta_kernel_couple_points_t *ker, const ec_basis_t *B1, const ec_basis_t *B2); + +/** + * @brief Given a couple of points (P1, P2) on a couple of curves (E1, E2) + * this function tests if both points are of order exactly 2^t + * + * @param T: couple point (P1, P2) + * @param E: a couple of curves (E1, E2) + * @param t: an integer + * @returns 0xFFFFFFFF on success, 0 on failure + */ +static int +test_couple_point_order_twof(const theta_couple_point_t *T, const theta_couple_curve_t *E, int t) +{ + int check_P1 = test_point_order_twof(&T->P1, &E->E1, t); + int check_P2 = test_point_order_twof(&T->P2, &E->E2, t); + + return check_P1 & check_P2; +} + +// end of hd_functions +/** + * @} + */ +// end of hd_module +/** + * @} + */ +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.c b/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.c new file mode 100644 index 0000000..a697ac7 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.c @@ -0,0 +1,143 @@ +#include + +#define FP2_ZERO 0 +#define FP2_ONE 1 +#define FP2_I 2 +#define FP2_MINUS_ONE 3 +#define FP2_MINUS_I 4 + +const int EVEN_INDEX[10][2] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 2}, {2, 0}, {2, 1}, {3, 0}, {3, 3}}; +const int CHI_EVAL[4][4] = {{1, 1, 1, 1}, {1, -1, 1, -1}, {1, 1, -1, -1}, {1, -1, -1, 1}}; +const fp2_t FP2_CONSTANTS[5] = {{ +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x500} +#elif RADIX == 32 +{0x25ed0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x800} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x97, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x130000000000000} +#else +{0x12f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb00000000000} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x500} +#elif RADIX == 32 +{0x25ed0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x800} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x97, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x130000000000000} +#else +{0x12f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb00000000000} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x1ffb, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1bf} +#elif RADIX == 32 +{0x1ffda12f, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x57f} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xffffffffffffff68, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffff} +#else +{0x1fffffffffffed0, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0xffffffffffff} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +}, { +#if 0 +#elif RADIX == 16 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 32 +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#else +{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +#endif +#endif +, +#if 0 +#elif RADIX == 16 +{0x1ffb, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1fff, 0x1bf} +#elif RADIX == 32 +{0x1ffda12f, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x57f} +#elif RADIX == 64 +#if defined(SQISIGN_GF_IMPL_BROADWELL) +{0xffffffffffffff68, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffff} +#else +{0x1fffffffffffed0, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0x1ffffffffffffff, 0xffffffffffff} +#endif +#endif +}}; +const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10] = {{{{FP2_ONE, FP2_I, FP2_ONE, FP2_I}, {FP2_ONE, FP2_MINUS_I, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_MINUS_ONE, FP2_MINUS_I}, {FP2_MINUS_ONE, FP2_I, FP2_MINUS_ONE, FP2_I}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_ONE, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}}; +const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6] = {{{{FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}}}, {{{FP2_ZERO, FP2_ZERO, FP2_ZERO, FP2_ONE}, {FP2_ZERO, FP2_ZERO, FP2_ONE, FP2_ZERO}, {FP2_ZERO, FP2_ONE, FP2_ZERO, FP2_ZERO}, {FP2_ONE, FP2_ZERO, FP2_ZERO, FP2_ZERO}}}, {{{FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE}, {FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}}}, {{{FP2_ONE, FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_MINUS_ONE, FP2_ONE, FP2_ONE}, {FP2_MINUS_ONE, FP2_ONE, FP2_MINUS_ONE, FP2_ONE}, {FP2_ONE, FP2_ONE, FP2_ONE, FP2_ONE}}}, {{{FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}}}, {{{FP2_ONE, FP2_I, FP2_I, FP2_MINUS_ONE}, {FP2_I, FP2_ONE, FP2_MINUS_ONE, FP2_I}, {FP2_I, FP2_MINUS_ONE, FP2_ONE, FP2_I}, {FP2_MINUS_ONE, FP2_I, FP2_I, FP2_ONE}}}}; diff --git a/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.h b/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.h new file mode 100644 index 0000000..b3147a4 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/hd_splitting_transforms.h @@ -0,0 +1,18 @@ +#ifndef HD_SPLITTING_H +#define HD_SPLITTING_H + +#include +#include + +typedef struct precomp_basis_change_matrix { + uint8_t m[4][4]; +} precomp_basis_change_matrix_t; + +extern const int EVEN_INDEX[10][2]; +extern const int CHI_EVAL[4][4]; +extern const fp2_t FP2_CONSTANTS[5]; +extern const precomp_basis_change_matrix_t SPLITTING_TRANSFORMS[10]; +extern const precomp_basis_change_matrix_t NORMALIZATION_TRANSFORMS[6]; + +#endif + diff --git a/src/pqm4/sqisign_lvl5/ref/isog.h b/src/pqm4/sqisign_lvl5/ref/isog.h new file mode 100644 index 0000000..b251ca3 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/isog.h @@ -0,0 +1,28 @@ +#ifndef _ISOG_H_ +#define _ISOG_H_ +#include +#include + +/* KPS structure for isogenies of degree 2 or 4 */ +typedef struct +{ + ec_point_t K; +} ec_kps2_t; +typedef struct +{ + ec_point_t K[3]; +} ec_kps4_t; + +void xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P); // degree-2 isogeny construction +void xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24); + +void xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P); // degree-4 isogeny construction +void xisog_4_singular(ec_kps4_t *kps, ec_point_t *B24, const ec_point_t P, ec_point_t A24); + +void xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps); +void xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps); + +void xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps); +void xeval_4_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_point_t P, const ec_kps4_t *kps); + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/isog_chains.c b/src/pqm4/sqisign_lvl5/ref/isog_chains.c new file mode 100644 index 0000000..abc9808 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/isog_chains.c @@ -0,0 +1,241 @@ +#include "isog.h" +#include + +// since we use degree 4 isogeny steps, we need to handle the odd case with care +static uint32_t +ec_eval_even_strategy(ec_curve_t *curve, + ec_point_t *points, + unsigned len_points, + const ec_point_t *kernel, + const int isog_len) +{ + ec_curve_normalize_A24(curve); + ec_point_t A24; + copy_point(&A24, &curve->A24); + + int space = 1; + for (int i = 1; i < isog_len; i *= 2) + ++space; + + // Stack of remaining kernel points and their associated orders + ec_point_t splits[space]; + uint16_t todo[space]; + splits[0] = *kernel; + todo[0] = isog_len; + + int current = 0; // Pointer to current top of stack + + // Chain of 4-isogenies + for (int j = 0; j < isog_len / 2; ++j) { + assert(current >= 0); + assert(todo[current] >= 1); + // Get the next point of order 4 + while (todo[current] != 2) { + assert(todo[current] >= 3); + // A new split will be added + ++current; + assert(current < space); + // We set the seed of the new split to be computed and saved + copy_point(&splits[current], &splits[current - 1]); + // if we copied from the very first element, then we perform one additional doubling + unsigned num_dbls = todo[current - 1] / 4 * 2 + todo[current - 1] % 2; + todo[current] = todo[current - 1] - num_dbls; + while (num_dbls--) + xDBL_A24(&splits[current], &splits[current], &A24, false); + } + + if (j == 0) { + assert(fp2_is_one(&A24.z)); + if (!ec_is_four_torsion(&splits[current], curve)) + return -1; + + ec_point_t T; + xDBL_A24(&T, &splits[current], &A24, false); + if (fp2_is_zero(&T.x)) + return -1; // special isogenies not allowed + } else { + assert(todo[current] == 2); +#ifndef NDEBUG + if (fp2_is_zero(&splits[current].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + + ec_point_t test; + xDBL_A24(&test, &splits[current], &A24, false); + if (fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly zero before doubling"); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + } + + // Evaluate 4-isogeny + ec_kps4_t kps4; + xisog_4(&kps4, &A24, splits[current]); + xeval_4(splits, splits, current, &kps4); + for (int i = 0; i < current; ++i) + todo[i] -= 2; + xeval_4(points, points, len_points, &kps4); + + --current; + } + assert(isog_len % 2 ? !current : current == -1); + + // Final 2-isogeny + if (isog_len % 2) { +#ifndef NDEBUG + if (fp2_is_zero(&splits[0].z)) + debug_print("splitting point z coordinate is unexpectedly zero"); + ec_point_t test; + copy_point(&test, &splits[0]); + xDBL_A24(&test, &test, &A24, false); + if (!fp2_is_zero(&test.z)) + debug_print("z coordinate is unexpectedly not zero after doubling"); +#endif + + // We need to check the order of this point in case there were no 4-isogenies + if (isog_len == 1 && !ec_is_two_torsion(&splits[0], curve)) + return -1; + if (fp2_is_zero(&splits[0].x)) { + // special isogenies not allowed + // this case can only happen if isog_len == 1; otherwise the + // previous 4-isogenies we computed ensure that $T=(0:1)$ is put + // as the kernel of the dual isogeny + return -1; + } + + ec_kps2_t kps2; + xisog_2(&kps2, &A24, splits[0]); + xeval_2(points, points, len_points, &kps2); + } + + // Output curve in the form (A:C) + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + + return 0; +} + +uint32_t +ec_eval_even(ec_curve_t *image, ec_isog_even_t *phi, ec_point_t *points, unsigned len_points) +{ + copy_curve(image, &phi->curve); + return ec_eval_even_strategy(image, points, len_points, &phi->kernel, phi->length); +} + +// naive implementation +uint32_t +ec_eval_small_chain(ec_curve_t *curve, + const ec_point_t *kernel, + int len, + ec_point_t *points, + unsigned len_points, + bool special) // do we allow special isogenies? +{ + + ec_point_t A24; + AC_to_A24(&A24, curve); + + ec_kps2_t kps; + ec_point_t small_K, big_K; + copy_point(&big_K, kernel); + + for (int i = 0; i < len; i++) { + copy_point(&small_K, &big_K); + // small_K = big_K; + for (int j = 0; j < len - i - 1; j++) { + xDBL_A24(&small_K, &small_K, &A24, false); + } + // Check the order of the point before the first isogeny step + if (i == 0 && !ec_is_two_torsion(&small_K, curve)) + return (uint32_t)-1; + // Perform isogeny step + if (fp2_is_zero(&small_K.x)) { + if (special) { + ec_point_t B24; + xisog_2_singular(&kps, &B24, A24); + xeval_2_singular(&big_K, &big_K, 1, &kps); + xeval_2_singular(points, points, len_points, &kps); + copy_point(&A24, &B24); + } else { + return (uint32_t)-1; + } + } else { + xisog_2(&kps, &A24, small_K); + xeval_2(&big_K, &big_K, 1, &kps); + xeval_2(points, points, len_points, &kps); + } + } + A24_to_AC(curve, &A24); + + curve->is_A24_computed_and_normalized = false; + return 0; +} + +uint32_t +ec_isomorphism(ec_isom_t *isom, const ec_curve_t *from, const ec_curve_t *to) +{ + fp2_t t0, t1, t2, t3, t4; + + fp2_mul(&t0, &from->A, &from->C); + fp2_mul(&t1, &to->A, &to->C); + + fp2_mul(&t2, &t1, &to->C); // toA*toC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*toA*toC^2 + fp2_sqr(&t3, &to->A); + fp2_mul(&t3, &t3, &to->A); // toA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->Nx, &t3, &t2); // 2*toA^3-9*toA*toC^2 + fp2_mul(&t2, &t0, &from->A); // fromA^2*fromC + fp2_sqr(&t3, &from->C); + fp2_mul(&t3, &t3, &from->C); // fromC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*fromC^3 + fp2_sub(&t3, &t3, &t2); // 3*fromC^3-fromA^2*fromC + fp2_mul(&isom->Nx, &isom->Nx, &t3); // lambda_x = (2*toA^3-9*toA*toC^2)*(3*fromC^3-fromA^2*fromC) + + fp2_mul(&t2, &t0, &from->C); // fromA*fromC^2 + fp2_add(&t3, &t2, &t2); + fp2_add(&t3, &t3, &t3); + fp2_add(&t3, &t3, &t3); + fp2_add(&t2, &t2, &t3); // 9*fromA*fromC^2 + fp2_sqr(&t3, &from->A); + fp2_mul(&t3, &t3, &from->A); // fromA^3 + fp2_add(&t3, &t3, &t3); + fp2_sub(&isom->D, &t3, &t2); // 2*fromA^3-9*fromA*fromC^2 + fp2_mul(&t2, &t1, &to->A); // toA^2*toC + fp2_sqr(&t3, &to->C); + fp2_mul(&t3, &t3, &to->C); // toC^3 + fp2_add(&t4, &t3, &t3); + fp2_add(&t3, &t4, &t3); // 3*toC^3 + fp2_sub(&t3, &t3, &t2); // 3*toC^3-toA^2*toC + fp2_mul(&isom->D, &isom->D, &t3); // lambda_z = (2*fromA^3-9*fromA*fromC^2)*(3*toC^3-toA^2*toC) + + // Mont -> SW -> SW -> Mont + fp2_mul(&t0, &to->C, &from->A); + fp2_mul(&t0, &t0, &isom->Nx); // lambda_x*toC*fromA + fp2_mul(&t1, &from->C, &to->A); + fp2_mul(&t1, &t1, &isom->D); // lambda_z*fromC*toA + fp2_sub(&isom->Nz, &t0, &t1); // lambda_x*toC*fromA - lambda_z*fromC*toA + fp2_mul(&t0, &from->C, &to->C); + fp2_add(&t1, &t0, &t0); + fp2_add(&t0, &t0, &t1); // 3*fromC*toC + fp2_mul(&isom->D, &isom->D, &t0); // 3*lambda_z*fromC*toC + fp2_mul(&isom->Nx, &isom->Nx, &t0); // 3*lambda_x*fromC*toC + + return (fp2_is_zero(&isom->Nx) | fp2_is_zero(&isom->D)); +} + +void +ec_iso_eval(ec_point_t *P, ec_isom_t *isom) +{ + fp2_t tmp; + fp2_mul(&P->x, &P->x, &isom->Nx); + fp2_mul(&tmp, &P->z, &isom->Nz); + fp2_add(&P->x, &P->x, &tmp); + fp2_mul(&P->z, &P->z, &isom->D); +} diff --git a/src/pqm4/sqisign_lvl5/ref/mp.c b/src/pqm4/sqisign_lvl5/ref/mp.c new file mode 100644 index 0000000..27f4a96 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/mp.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include + +// double-wide multiplication +void +MUL(digit_t *out, const digit_t a, const digit_t b) +{ +#ifdef RADIX_32 + uint64_t r = (uint64_t)a * b; + out[0] = r & 0xFFFFFFFFUL; + out[1] = r >> 32; + +#elif defined(RADIX_64) && defined(_MSC_VER) + uint64_t umul_hi; + out[0] = _umul128(a, b, &umul_hi); + out[1] = umul_hi; + +#elif defined(RADIX_64) && defined(HAVE_UINT128) + unsigned __int128 umul_tmp; + umul_tmp = (unsigned __int128)(a) * (unsigned __int128)(b); + out[0] = (uint64_t)umul_tmp; + out[1] = (uint64_t)(umul_tmp >> 64); + +#else + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al * bl; + albh = al * bh; + ahbl = ah * bl; + ahbh = ah * bh; + out[0] = albl & mask_low; // out00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + out[0] ^= temp << (sizeof(digit_t) * 4); // out01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + out[1] = temp & mask_low; // out10 + carry = temp & mask_high; + out[1] ^= (ahbh & mask_high) + carry; // out11 + +#endif +} + +void +mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision addition + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(c[i], carry, a[i], b[i], carry); + } +} + +digit_t +mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision right shift by 1...RADIX-1 + digit_t bit_out = x[0] & 1; + + for (unsigned int i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], shift, x[i], RADIX); + } + x[nwords - 1] >>= shift; + return bit_out; +} + +void +mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ // Multiprecision left shift by 1...RADIX-1 + + for (int i = nwords - 1; i > 0; i--) { + SHIFTL(x[i], x[i - 1], shift, x[i], RADIX); + } + x[0] <<= shift; +} + +void +multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords) +{ + int t = shift; + while (t > RADIX - 1) { + mp_shiftl(x, RADIX - 1, nwords); + t = t - (RADIX - 1); + } + mp_shiftl(x, t, nwords); +} + +// The below functions were taken from the EC module + +void +mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords) +{ // Multiprecision subtraction, assuming a > b + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(c[i], borrow, a[i], b[i], borrow); + } +} + +void +select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords) +{ // Select c <- a if mask = 0, select c <- b if mask = 1...1 + + for (int i = 0; i < nwords; i++) { + c[i] = ((a[i] ^ b[i]) & mask) ^ a[i]; + } +} + +void +swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords) +{ // Swap entries + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then a <- b and b <- a + digit_t temp; + + for (int i = 0; i < nwords; i++) { + temp = option & (a[i] ^ b[i]); + a[i] = temp ^ a[i]; + b[i] = temp ^ b[i]; + } +} + +int +mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords) +{ // Multiprecision comparison, a=b? : (1) a>b, (0) a=b, (-1) a= 0; i--) { + if (a[i] > b[i]) + return 1; + else if (a[i] < b[i]) + return -1; + } + return 0; +} + +bool +mp_is_zero(const digit_t *a, unsigned int nwords) +{ // Is a multiprecision element zero? + // Returns 1 (true) if a=0, 0 (false) otherwise + digit_t r = 0; + + for (unsigned int i = 0; i < nwords; i++) + r |= a[i] ^ 0; + + return (bool)is_digit_zero_ct(r); +} + +void +mp_mul2(digit_t *c, const digit_t *a, const digit_t *b) +{ // Multiprecision multiplication fixed to two-digit operands + unsigned int carry = 0; + digit_t t0[2], t1[2], t2[2]; + + MUL(t0, a[0], b[0]); + MUL(t1, a[0], b[1]); + ADDC(t0[1], carry, t0[1], t1[0], carry); + ADDC(t1[1], carry, 0, t1[1], carry); + MUL(t2, a[1], b[1]); + ADDC(t2[0], carry, t2[0], t1[1], carry); + ADDC(t2[1], carry, 0, t2[1], carry); + c[0] = t0[0]; + c[1] = t0[1]; + c[2] = t2[0]; + c[3] = t2[1]; +} + +void +mp_print(const digit_t *a, size_t nwords) +{ + printf("0x"); + for (size_t i = 0; i < nwords; i++) { +#ifdef RADIX_32 + printf("%08" PRIx32, a[nwords - i - 1]); // Print each word with 8 hex digits +#elif defined(RADIX_64) + printf("%016" PRIx64, a[nwords - i - 1]); // Print each word with 16 hex digits +#endif + } +} + +void +mp_copy(digit_t *b, const digit_t *a, size_t nwords) +{ + for (size_t i = 0; i < nwords; i++) { + b[i] = a[i]; + } +} + +void +mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords) +{ + // Multiprecision multiplication, c = a*b, for nwords-digit inputs, with nwords-digit output + // explicitly does not use the higher half of c, as we do not need in our applications + digit_t carry, UV[2], t[nwords], cc[nwords]; + + for (size_t i = 0; i < nwords; i++) { + cc[i] = 0; + } + + for (size_t i = 0; i < nwords; i++) { + + MUL(t, a[i], b[0]); + + for (size_t j = 1; j < nwords - 1; j++) { + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + t[j + 1] = UV[1] + carry; + } + + int j = nwords - 1; + MUL(UV, a[i], b[j]); + ADDC(t[j], carry, t[j], UV[0], 0); + + mp_add(&cc[i], &cc[i], t, nwords - i); + } + + mp_copy(c, cc, nwords); +} + +void +mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords) +{ // Multiprecision modulo 2^e, with 0 <= a < 2^(e) + unsigned int i, q = e >> LOG2RADIX, r = e & (RADIX - 1); + + if (q < nwords) { + a[q] &= ((digit_t)1 << r) - 1; + + for (i = q + 1; i < nwords; i++) { + a[i] = 0; + } + } +} + +void +mp_neg(digit_t *a, unsigned int nwords) +{ // negates a + for (size_t i = 0; i < nwords; i++) { + a[i] ^= -1; + } + + a[0] += 1; +} + +bool +mp_is_one(const digit_t *x, unsigned int nwords) +{ // returns true if x represents 1, and false otherwise + if (x[0] != 1) { + return false; + } + + for (size_t i = 1; i < nwords; i++) { + if (x[i] != 0) { + return false; + } + } + return true; +} + +void +mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords) +{ // Inversion modulo 2^e, using Newton's method and Hensel lifting + // we take the first power of 2 larger than e to use + // requires a to be odd, of course + // returns b such that a*b = 1 mod 2^e + assert((a[0] & 1) == 1); + + digit_t x[nwords], y[nwords], aa[nwords], mp_one[nwords], tmp[nwords]; + mp_copy(aa, a, nwords); + + mp_one[0] = 1; + for (unsigned int i = 1; i < nwords; i++) { + mp_one[i] = 0; + } + + int p = 1; + while ((1 << p) < e) { + p++; + } + p -= 2; // using k = 4 for initial inverse + int w = (1 << (p + 2)); + + mp_mod_2exp(aa, w, nwords); + mp_add(x, aa, aa, nwords); + mp_add(x, x, aa, nwords); // should be 3a + x[0] ^= (1 << 1); // so that x equals (3a)^2 xor 2 + mp_mod_2exp(x, w, nwords); // now x*a = 1 mod 2^4, which we lift + + mp_mul(tmp, aa, x, nwords); + mp_neg(tmp, nwords); + mp_add(y, mp_one, tmp, nwords); + + // Hensel lifting for p rounds + for (int i = 0; i < p; i++) { + mp_add(tmp, mp_one, y, nwords); + mp_mul(x, x, tmp, nwords); + mp_mul(y, y, y, nwords); + } + + mp_mod_2exp(x, w, nwords); + mp_copy(b, x, nwords); + + // verify results + mp_mul(x, x, aa, nwords); + mp_mod_2exp(x, w, nwords); + assert(mp_is_one(x, nwords)); +} + +void +mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords) +{ + // given a matrix ( ( a, b ), (c, d) ) of values mod 2^e + // returns the inverse matrix gamma ( (d, -b), (-c, a) ) + // where gamma is the inverse of the determinant a*d - b*c + // assumes the matrix is invertible, otherwises, inversion of determinant fails + + int p = 1; + while ((1 << p) < e) { + p++; + } + int w = (1 << (p)); + + digit_t det[nwords], tmp[nwords], resa[nwords], resb[nwords], resc[nwords], resd[nwords]; + mp_mul(tmp, r1, s2, nwords); + mp_mul(det, r2, s1, nwords); + mp_sub(det, tmp, det, nwords); + mp_inv_2e(det, det, e, nwords); + + mp_mul(resa, det, s2, nwords); + mp_mul(resb, det, r2, nwords); + mp_mul(resc, det, s1, nwords); + mp_mul(resd, det, r1, nwords); + + mp_neg(resb, nwords); + mp_neg(resc, nwords); + + mp_mod_2exp(resa, w, nwords); + mp_mod_2exp(resb, w, nwords); + mp_mod_2exp(resc, w, nwords); + mp_mod_2exp(resd, w, nwords); + + mp_copy(r1, resa, nwords); + mp_copy(r2, resb, nwords); + mp_copy(s1, resc, nwords); + mp_copy(s2, resd, nwords); +} diff --git a/src/pqm4/sqisign_lvl5/ref/mp.h b/src/pqm4/sqisign_lvl5/ref/mp.h new file mode 100644 index 0000000..b3733b5 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/mp.h @@ -0,0 +1,88 @@ +#ifndef MP_H +#define MP_H + +#include +#include +#include + +// Functions taken from the GF module + +void mp_add(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +digit_t mp_shiftr(digit_t *x, const unsigned int shift, const unsigned int nwords); +void multiple_mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void mp_shiftl(digit_t *x, const unsigned int shift, const unsigned int nwords); +void MUL(digit_t *out, const digit_t a, const digit_t b); + +// Functions taken from the EC module + +void mp_sub(digit_t *c, const digit_t *a, const digit_t *b, const unsigned int nwords); +void select_ct(digit_t *c, const digit_t *a, const digit_t *b, const digit_t mask, const int nwords); +void swap_ct(digit_t *a, digit_t *b, const digit_t option, const int nwords); +int mp_compare(const digit_t *a, const digit_t *b, unsigned int nwords); +bool mp_is_zero(const digit_t *a, unsigned int nwords); +void mp_mul2(digit_t *c, const digit_t *a, const digit_t *b); + +// Further functions for multiprecision arithmetic +void mp_print(const digit_t *a, size_t nwords); +void mp_copy(digit_t *b, const digit_t *a, size_t nwords); +void mp_neg(digit_t *a, unsigned int nwords); +bool mp_is_one(const digit_t *x, unsigned int nwords); +void mp_mul(digit_t *c, const digit_t *a, const digit_t *b, size_t nwords); +void mp_mod_2exp(digit_t *a, unsigned int e, unsigned int nwords); +void mp_inv_2e(digit_t *b, const digit_t *a, int e, unsigned int nwords); +void mp_invert_matrix(digit_t *r1, digit_t *r2, digit_t *s1, digit_t *s2, int e, unsigned int nwords); + +#define mp_is_odd(x, nwords) (((nwords) != 0) & (int)(x)[0]) +#define mp_is_even(x, nwords) (!mp_is_odd(x, nwords)) + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise +static inline unsigned int +is_digit_nonzero_ct(digit_t x) +{ // Is x != 0? + return (unsigned int)((x | (0 - x)) >> (RADIX - 1)); +} + +static inline unsigned int +is_digit_zero_ct(digit_t x) +{ // Is x = 0? + return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); +} + +static inline unsigned int +is_digit_lessthan_ct(digit_t x, digit_t y) +{ // Is x < y? + return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Platform-independent macros for digit-size operations + * **********************/ + +// Digit addition with carry +#define ADDC(sumOut, carryOut, addend1, addend2, carryIn) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(differenceOut, borrowOut, minuend, subtrahend, borrowIn) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = \ + (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/pqm4_api.c b/src/pqm4/sqisign_lvl5/ref/pqm4_api.c new file mode 100644 index 0000000..49cba9d --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/pqm4_api.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +typedef struct { + size_t mlen; + char msg[59]; + size_t smlen; + char sm[59 + CRYPTO_BYTES]; +} SQISign_KAT_t; + +const char kat_lvl5_pk[CRYPTO_PUBLICKEYBYTES] = { + 0x4B, 0xA3, 0x81, 0xDA, 0xF9, 0x17, 0x40, 0x97, 0x4C, 0xB3, 0x61, 0xE6, 0x5A, 0x1B, 0x82, 0xFD, 0x17, 0x4A, 0x1F, 0x58, 0x18, 0x7A, 0xD8, 0x2C, 0xD8, 0xBC, 0x06, 0xCC, 0x3E, 0xC4, 0x29, 0x56, 0xC2, 0x4E, 0x7F, 0xA7, 0x54, 0x6D, 0xBE, 0x63, 0x50, 0x30, 0x73, 0xCB, 0x42, 0x57, 0x7F, 0x57, 0xD5, 0xCF, 0x36, 0xE2, 0xF0, 0x6E, 0xBD, 0xFB, 0x6E, 0x02, 0x7F, 0xCD, 0xD6, 0x52, 0x57, 0x01, 0x04, 0x70, 0x5B, 0xF7, 0x83, 0x55, 0xD9, 0x61, 0x24, 0xA2, 0xBF, 0x6B, 0x49, 0x2E, 0xFC, 0x43, 0x49, 0xF4, 0xD3, 0xAF, 0x47, 0x55, 0xCB, 0x3D, 0x4C, 0xAB, 0xD5, 0x05, 0x8A, 0x6B, 0x2B, 0x22, 0x8E, 0x47, 0x26, 0xE5, 0x15, 0xD2, 0x4B, 0x4C, 0x33, 0x12, 0x44, 0x49, 0x1B, 0x0A, 0x59, 0xEC, 0x94, 0x1D, 0xDD, 0xE7, 0xDA, 0x12, 0xEE, 0xE7, 0x3C, 0x84, 0x75, 0x40, 0xCE, 0xEA, 0x70, 0x00, 0x0B, +}; + +const SQISign_KAT_t kat_lvl5[2] = { + { + .mlen = 32, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 32 + CRYPTO_BYTES, + .sm = { 0x8D, 0x7F, 0xCC, 0x36, 0xA4, 0x56, 0x85, 0x22, 0x11, 0x50, 0x98, 0x9C, 0x4F, 0x23, 0xCD, 0x24, 0x6D, 0xB1, 0x3A, 0x6A, 0x8C, 0xA5, 0x6D, 0x81, 0x13, 0x6D, 0x95, 0x6B, 0xD6, 0x15, 0x5A, 0x37, 0x8E, 0x96, 0x98, 0x14, 0x4A, 0x0E, 0x27, 0x2A, 0x0B, 0x8C, 0x80, 0x64, 0xC5, 0x8C, 0x62, 0x5C, 0xB4, 0x5C, 0xB0, 0x64, 0x99, 0x19, 0x9C, 0x6B, 0x0D, 0x34, 0x60, 0x9F, 0x75, 0x01, 0xA0, 0x01, 0x3A, 0x3D, 0xDF, 0x64, 0xB4, 0x74, 0x38, 0x39, 0x6D, 0x3A, 0x41, 0xE3, 0xA5, 0xAF, 0xDF, 0xEB, 0x1E, 0xAC, 0x87, 0x8A, 0x1E, 0xDF, 0x76, 0x1B, 0x81, 0x33, 0xAF, 0x43, 0x53, 0xB8, 0x2C, 0x2C, 0x7A, 0x29, 0x89, 0x9C, 0x2F, 0xD7, 0x22, 0x78, 0x88, 0x98, 0xA4, 0x24, 0xF6, 0x0B, 0xFE, 0x4A, 0xA1, 0x5F, 0xC0, 0x71, 0xD1, 0x31, 0xB4, 0xE5, 0x3A, 0xDB, 0xB7, 0x8B, 0xCD, 0xF0, 0x8C, 0x00, 0x00, 0x01, 0x2C, 0x89, 0x64, 0x6E, 0x35, 0xD3, 0xF8, 0x25, 0x76, 0x2E, 0x84, 0xF2, 0x68, 0x37, 0x8E, 0xAB, 0x35, 0x7D, 0x02, 0xE9, 0x01, 0x29, 0x22, 0xE5, 0x78, 0x3E, 0x49, 0xB0, 0x21, 0xCB, 0xD4, 0x07, 0xEB, 0x60, 0x32, 0x18, 0x0E, 0x17, 0xCB, 0x7D, 0x58, 0xCF, 0x54, 0xC1, 0xFF, 0x5E, 0x96, 0x25, 0x82, 0x68, 0x1F, 0x4B, 0x6C, 0xD5, 0x0E, 0xD3, 0x11, 0x8C, 0x36, 0xBA, 0x05, 0xC1, 0xDC, 0x34, 0xE2, 0xBD, 0x31, 0x86, 0x8C, 0x51, 0xC9, 0xE2, 0x7D, 0xEB, 0xA1, 0x7F, 0x15, 0xA0, 0xDF, 0xEC, 0xF7, 0xA2, 0x2C, 0x30, 0x7D, 0x1E, 0x20, 0x91, 0xCF, 0x97, 0xF0, 0x6B, 0x9A, 0x21, 0x58, 0x26, 0x86, 0x9F, 0x7E, 0xC5, 0x40, 0xA9, 0xF6, 0xB0, 0x9C, 0x89, 0xDD, 0xC3, 0xE5, 0xF5, 0xEC, 0xAE, 0xBD, 0x0E, 0xB1, 0xDD, 0xDA, 0xCC, 0x0C, 0xAE, 0x0E, 0xC9, 0xCD, 0xDE, 0xEF, 0xEE, 0x46, 0x61, 0xC2, 0xEB, 0xB0, 0x8B, 0xD3, 0x8F, 0xB7, 0x86, 0x16, 0xAC, 0x84, 0x3A, 0x47, 0xA3, 0x48, 0x8B, 0xF6, 0xDB, 0x2B, 0x4E, 0xA2, 0x60, 0x1F, 0xA5, 0x47, 0xD1, 0xFA, 0x34, 0x65, 0xD2, 0x63, 0x00, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, + { + .mlen = 59, + .msg = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + .smlen = 59 + CRYPTO_BYTES, + .sm = { 0xD5, 0x7B, 0x91, 0xF4, 0x45, 0xB3, 0xE1, 0x8E, 0xAF, 0x36, 0xAF, 0xC9, 0xDE, 0xB4, 0x78, 0xCC, 0x8A, 0x8C, 0xEA, 0x72, 0x1E, 0xF3, 0x8F, 0xEC, 0xF3, 0xDC, 0xA6, 0x92, 0x22, 0xC4, 0xB8, 0x16, 0xD4, 0x20, 0x3C, 0x91, 0x0E, 0x0E, 0x92, 0xA0, 0xE6, 0xC0, 0x0E, 0xB1, 0x22, 0x50, 0xA3, 0x9B, 0x8D, 0x5F, 0x18, 0x3B, 0xAF, 0xDE, 0xBE, 0xD2, 0x1A, 0x74, 0x88, 0x6F, 0x8E, 0xD9, 0xA8, 0x01, 0xF0, 0x41, 0x22, 0x82, 0x9F, 0xC5, 0xDF, 0x2D, 0xD3, 0x4F, 0x84, 0xAC, 0x15, 0xA1, 0xEF, 0xC0, 0x14, 0x16, 0xB7, 0x12, 0xDC, 0xE9, 0xB8, 0x94, 0x48, 0x13, 0x63, 0xD2, 0x3C, 0x6F, 0x21, 0xD1, 0x70, 0xDA, 0xC6, 0x7A, 0xC8, 0xF5, 0x03, 0xC9, 0x8A, 0xAA, 0x52, 0x48, 0x50, 0x79, 0x8D, 0xA5, 0x1B, 0x19, 0xCE, 0x98, 0x3D, 0xBE, 0xB2, 0xB4, 0x7E, 0x92, 0x23, 0xDD, 0x98, 0x9B, 0x7D, 0x01, 0x01, 0x04, 0xAE, 0xFA, 0xF9, 0xEF, 0x9E, 0xE9, 0xE9, 0xE0, 0x5B, 0x2B, 0x03, 0x41, 0xB6, 0x5F, 0xAA, 0x38, 0x5E, 0xA3, 0x3C, 0x8D, 0x25, 0x7C, 0x51, 0x5E, 0x74, 0x55, 0x09, 0xE1, 0xB4, 0x0C, 0xF9, 0x31, 0x4F, 0x20, 0xC1, 0x23, 0x0D, 0x16, 0x1D, 0x6A, 0x79, 0x98, 0x81, 0x52, 0x77, 0x4A, 0x7F, 0xF7, 0x01, 0x61, 0xE8, 0x6B, 0xF7, 0xE6, 0x75, 0x63, 0x8F, 0xB4, 0x73, 0xF3, 0x83, 0x44, 0x43, 0x01, 0x86, 0x12, 0x6D, 0xE6, 0x63, 0xF3, 0x10, 0x64, 0xDB, 0xCB, 0x6A, 0x84, 0xA2, 0x46, 0x7B, 0x80, 0x18, 0x7D, 0x66, 0x15, 0x82, 0xFF, 0xDD, 0x45, 0x41, 0xDA, 0x94, 0x5E, 0x46, 0xD0, 0x18, 0x1D, 0x63, 0x75, 0xB2, 0xE1, 0xAF, 0x9B, 0xF7, 0xDD, 0x72, 0x99, 0x15, 0xE4, 0x48, 0xD9, 0x72, 0x45, 0x51, 0x17, 0x2A, 0x71, 0xB4, 0x88, 0x19, 0xCD, 0xC6, 0x4C, 0x2B, 0xAF, 0x34, 0xB0, 0x89, 0x3B, 0x36, 0x62, 0x96, 0x48, 0x19, 0x87, 0x65, 0x44, 0x9A, 0x88, 0x66, 0x0E, 0x5A, 0xA9, 0x2C, 0xA7, 0x67, 0x54, 0xC3, 0x53, 0xCF, 0x62, 0x59, 0x6A, 0x60, 0xFF, 0x17, 0x8A, 0xA1, 0x69, 0x65, 0x00, 0x04, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, + }, +}; + +int crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + memcpy(pk, kat_lvl5_pk, CRYPTO_PUBLICKEYBYTES); + // We don't need the secret key + memset(sk, 0, CRYPTO_SECRETKEYBYTES); +} + +int crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, + size_t mlen, const unsigned char *sk) { + for (size_t i = 0; i < sizeof(kat_lvl5) / sizeof(kat_lvl5[0]); i++) { + if (mlen == kat_lvl5[i].mlen) { + memcpy(sm, kat_lvl5[i].sm, kat_lvl5[i].smlen); + *smlen = kat_lvl5[i].smlen; + return 0; + } + } + + return 1; +} + +int crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, + size_t smlen, const unsigned char *pk) { + unsigned long long mlen_ull = *mlen; + int ret = sqisign_open(m, &mlen_ull, sm, smlen, pk); + if (mlen) { + *mlen = mlen_ull; + } + return ret; +} diff --git a/src/pqm4/sqisign_lvl5/ref/rng.h b/src/pqm4/sqisign_lvl5/ref/rng.h new file mode 100644 index 0000000..3c24d07 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/rng.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef rng_h +#define rng_h + +#include "randombytes.h" + +#endif /* rng_h */ diff --git a/src/pqm4/sqisign_lvl5/ref/sig.h b/src/pqm4/sqisign_lvl5/ref/sig.h new file mode 100644 index 0000000..4c33510 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/sig.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SQISIGN_H +#define SQISIGN_H + +#include +#include + +#if defined(ENABLE_SIGN) +/** + * SQIsign keypair generation. + * + * The implementation corresponds to SQIsign.CompactKeyGen() in the SQIsign spec. + * The caller is responsible to allocate sufficient memory to hold pk and sk. + * + * @param[out] pk SQIsign public key + * @param[out] sk SQIsign secret key + * @return int status code + */ +SQISIGN_API +int sqisign_keypair(unsigned char *pk, unsigned char *sk); + +/** + * SQIsign signature generation. + * + * The implementation performs SQIsign.expandSK() + SQIsign.sign() in the SQIsign spec. + * Keys provided is a compacted secret keys. + * The caller is responsible to allocate sufficient memory to hold sm. + * + * @param[out] sm Signature concatenated with message + * @param[out] smlen Pointer to the length of sm + * @param[in] m Message to be signed + * @param[in] mlen Message length + * @param[in] sk Compacted secret key + * @return int status code + */ +SQISIGN_API +int sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk); +#endif + +/** + * SQIsign open signature. + * + * The implementation performs SQIsign.verify(). If the signature verification succeeded, the + * original message is stored in m. Keys provided is a compact public key. The caller is responsible + * to allocate sufficient memory to hold m. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sm Signature concatenated with message + * @param[in] smlen Length of sm + * @param[in] pk Compacted public key + * @return int status code + */ +SQISIGN_API +int sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk); + +/** + * SQIsign verify signature. + * + * If the signature verification succeeded, returns 0, otherwise 1. + * + * @param[out] m Message stored if verification succeeds + * @param[out] mlen Pointer to the length of m + * @param[in] sig Signature + * @param[in] siglen Length of sig + * @param[in] pk Compacted public key + * @return int 0 if verification succeeded, 1 otherwise. + */ +SQISIGN_API +int sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk); + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/sqisign.c b/src/pqm4/sqisign_lvl5/ref/sqisign.c new file mode 100644 index 0000000..57fd75d --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/sqisign.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#if defined(ENABLE_SIGN) +#include +#endif + +#if defined(ENABLE_SIGN) +SQISIGN_API +int +sqisign_keypair(unsigned char *pk, unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + secret_key_init(&skt); + + ret = !protocols_keygen(&pkt, &skt); + + secret_key_to_bytes(sk, &skt, &pkt); + public_key_to_bytes(pk, &pkt); + secret_key_finalize(&skt); + return ret; +} + +SQISIGN_API +int +sqisign_sign(unsigned char *sm, + unsigned long long *smlen, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk) +{ + int ret = 0; + secret_key_t skt; + public_key_t pkt = { 0 }; + signature_t sigt; + secret_key_init(&skt); + secret_key_from_bytes(&skt, &pkt, sk); + + memmove(sm + SIGNATURE_BYTES, m, mlen); + + ret = !protocols_sign(&sigt, &pkt, &skt, sm + SIGNATURE_BYTES, mlen); + if (ret != 0) { + *smlen = 0; + goto err; + } + + signature_to_bytes(sm, &sigt); + *smlen = SIGNATURE_BYTES + mlen; + +err: + secret_key_finalize(&skt); + return ret; +} +#endif + +SQISIGN_API +int +sqisign_open(unsigned char *m, + unsigned long long *mlen, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk) +{ + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sm); + + ret = !protocols_verify(&sigt, &pkt, sm + SIGNATURE_BYTES, smlen - SIGNATURE_BYTES); + + if (!ret) { + *mlen = smlen - SIGNATURE_BYTES; + memmove(m, sm + SIGNATURE_BYTES, *mlen); + } else { + *mlen = 0; + memset(m, 0, smlen - SIGNATURE_BYTES); + } + + return ret; +} + +SQISIGN_API +int +sqisign_verify(const unsigned char *m, + unsigned long long mlen, + const unsigned char *sig, + unsigned long long siglen, + const unsigned char *pk) +{ + + int ret = 0; + public_key_t pkt = { 0 }; + signature_t sigt; + + public_key_from_bytes(&pkt, pk); + signature_from_bytes(&sigt, sig); + + ret = !protocols_verify(&sigt, &pkt, m, mlen); + + return ret; +} diff --git a/src/pqm4/sqisign_lvl5/ref/sqisign_namespace.h b/src/pqm4/sqisign_lvl5/ref/sqisign_namespace.h new file mode 100644 index 0000000..14fd51d --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/sqisign_namespace.h @@ -0,0 +1,1022 @@ + +#ifndef SQISIGN_NAMESPACE_H +#define SQISIGN_NAMESPACE_H + +//#define DISABLE_NAMESPACING + +#if defined(_WIN32) +#define SQISIGN_API __declspec(dllexport) +#else +#define SQISIGN_API __attribute__((visibility("default"))) +#endif + +#define PARAM_JOIN3_(a, b, c) sqisign_##a##_##b##_##c +#define PARAM_JOIN3(a, b, c) PARAM_JOIN3_(a, b, c) +#define PARAM_NAME3(end, s) PARAM_JOIN3(SQISIGN_VARIANT, end, s) + +#define PARAM_JOIN2_(a, b) sqisign_##a##_##b +#define PARAM_JOIN2(a, b) PARAM_JOIN2_(a, b) +#define PARAM_NAME2(end, s) PARAM_JOIN2(end, s) + +#ifndef DISABLE_NAMESPACING +#define SQISIGN_NAMESPACE_GENERIC(s) PARAM_NAME2(gen, s) +#else +#define SQISIGN_NAMESPACE_GENERIC(s) s +#endif + +#if defined(SQISIGN_VARIANT) && !defined(DISABLE_NAMESPACING) +#if defined(SQISIGN_BUILD_TYPE_REF) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(ref, s) +#elif defined(SQISIGN_BUILD_TYPE_OPT) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(opt, s) +#elif defined(SQISIGN_BUILD_TYPE_BROADWELL) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(broadwell, s) +#elif defined(SQISIGN_BUILD_TYPE_ARM64CRYPTO) +#define SQISIGN_NAMESPACE(s) PARAM_NAME3(arm64crypto, s) +#else +#error "Build type not known" +#endif + +#else +#define SQISIGN_NAMESPACE(s) s +#endif + +// Namespacing symbols exported from algebra.c: +#undef quat_alg_add +#undef quat_alg_conj +#undef quat_alg_coord_mul +#undef quat_alg_elem_copy +#undef quat_alg_elem_copy_ibz +#undef quat_alg_elem_equal +#undef quat_alg_elem_is_zero +#undef quat_alg_elem_mul_by_scalar +#undef quat_alg_elem_set +#undef quat_alg_equal_denom +#undef quat_alg_init_set_ui +#undef quat_alg_make_primitive +#undef quat_alg_mul +#undef quat_alg_norm +#undef quat_alg_normalize +#undef quat_alg_scalar +#undef quat_alg_sub + +#define quat_alg_add SQISIGN_NAMESPACE_GENERIC(quat_alg_add) +#define quat_alg_conj SQISIGN_NAMESPACE_GENERIC(quat_alg_conj) +#define quat_alg_coord_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_coord_mul) +#define quat_alg_elem_copy SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy) +#define quat_alg_elem_copy_ibz SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_copy_ibz) +#define quat_alg_elem_equal SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_equal) +#define quat_alg_elem_is_zero SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_is_zero) +#define quat_alg_elem_mul_by_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_mul_by_scalar) +#define quat_alg_elem_set SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_set) +#define quat_alg_equal_denom SQISIGN_NAMESPACE_GENERIC(quat_alg_equal_denom) +#define quat_alg_init_set_ui SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set_ui) +#define quat_alg_make_primitive SQISIGN_NAMESPACE_GENERIC(quat_alg_make_primitive) +#define quat_alg_mul SQISIGN_NAMESPACE_GENERIC(quat_alg_mul) +#define quat_alg_norm SQISIGN_NAMESPACE_GENERIC(quat_alg_norm) +#define quat_alg_normalize SQISIGN_NAMESPACE_GENERIC(quat_alg_normalize) +#define quat_alg_scalar SQISIGN_NAMESPACE_GENERIC(quat_alg_scalar) +#define quat_alg_sub SQISIGN_NAMESPACE_GENERIC(quat_alg_sub) + +// Namespacing symbols exported from api.c: +#undef crypto_sign +#undef crypto_sign_keypair +#undef crypto_sign_open + +#define crypto_sign SQISIGN_NAMESPACE(crypto_sign) +#define crypto_sign_keypair SQISIGN_NAMESPACE(crypto_sign_keypair) +#define crypto_sign_open SQISIGN_NAMESPACE(crypto_sign_open) + +// Namespacing symbols exported from basis.c: +#undef ec_curve_to_basis_2f_from_hint +#undef ec_curve_to_basis_2f_to_hint +#undef ec_recover_y +#undef lift_basis +#undef lift_basis_normalized + +#define ec_curve_to_basis_2f_from_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_from_hint) +#define ec_curve_to_basis_2f_to_hint SQISIGN_NAMESPACE(ec_curve_to_basis_2f_to_hint) +#define ec_recover_y SQISIGN_NAMESPACE(ec_recover_y) +#define lift_basis SQISIGN_NAMESPACE(lift_basis) +#define lift_basis_normalized SQISIGN_NAMESPACE(lift_basis_normalized) + +// Namespacing symbols exported from biextension.c: +#undef clear_cofac +#undef ec_dlog_2_tate +#undef ec_dlog_2_weil +#undef fp2_frob +#undef reduced_tate +#undef weil + +#define clear_cofac SQISIGN_NAMESPACE(clear_cofac) +#define ec_dlog_2_tate SQISIGN_NAMESPACE(ec_dlog_2_tate) +#define ec_dlog_2_weil SQISIGN_NAMESPACE(ec_dlog_2_weil) +#define fp2_frob SQISIGN_NAMESPACE(fp2_frob) +#define reduced_tate SQISIGN_NAMESPACE(reduced_tate) +#define weil SQISIGN_NAMESPACE(weil) + +// Namespacing symbols exported from common.c: +#undef hash_to_challenge +#undef public_key_finalize +#undef public_key_init + +#define hash_to_challenge SQISIGN_NAMESPACE(hash_to_challenge) +#define public_key_finalize SQISIGN_NAMESPACE(public_key_finalize) +#define public_key_init SQISIGN_NAMESPACE(public_key_init) + +// Namespacing symbols exported from dim2.c: +#undef ibz_2x2_mul_mod +#undef ibz_mat_2x2_add +#undef ibz_mat_2x2_copy +#undef ibz_mat_2x2_det_from_ibz +#undef ibz_mat_2x2_eval +#undef ibz_mat_2x2_inv_mod +#undef ibz_mat_2x2_set +#undef ibz_vec_2_set + +#define ibz_2x2_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_2x2_mul_mod) +#define ibz_mat_2x2_add SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_add) +#define ibz_mat_2x2_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_copy) +#define ibz_mat_2x2_det_from_ibz SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_det_from_ibz) +#define ibz_mat_2x2_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_eval) +#define ibz_mat_2x2_inv_mod SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_inv_mod) +#define ibz_mat_2x2_set SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_set) +#define ibz_vec_2_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_set) + +// Namespacing symbols exported from dim2id2iso.c: +#undef dim2id2iso_arbitrary_isogeny_evaluation +#undef dim2id2iso_ideal_to_isogeny_clapotis +#undef find_uv +#undef fixed_degree_isogeny_and_eval + +#define dim2id2iso_arbitrary_isogeny_evaluation SQISIGN_NAMESPACE(dim2id2iso_arbitrary_isogeny_evaluation) +#define dim2id2iso_ideal_to_isogeny_clapotis SQISIGN_NAMESPACE(dim2id2iso_ideal_to_isogeny_clapotis) +#define find_uv SQISIGN_NAMESPACE(find_uv) +#define fixed_degree_isogeny_and_eval SQISIGN_NAMESPACE(fixed_degree_isogeny_and_eval) + +// Namespacing symbols exported from dim4.c: +#undef ibz_inv_dim4_make_coeff_mpm +#undef ibz_inv_dim4_make_coeff_pmp +#undef ibz_mat_4x4_copy +#undef ibz_mat_4x4_equal +#undef ibz_mat_4x4_eval +#undef ibz_mat_4x4_eval_t +#undef ibz_mat_4x4_gcd +#undef ibz_mat_4x4_identity +#undef ibz_mat_4x4_inv_with_det_as_denom +#undef ibz_mat_4x4_is_identity +#undef ibz_mat_4x4_mul +#undef ibz_mat_4x4_negate +#undef ibz_mat_4x4_scalar_div +#undef ibz_mat_4x4_scalar_mul +#undef ibz_mat_4x4_transpose +#undef ibz_mat_4x4_zero +#undef ibz_vec_4_add +#undef ibz_vec_4_content +#undef ibz_vec_4_copy +#undef ibz_vec_4_copy_ibz +#undef ibz_vec_4_is_zero +#undef ibz_vec_4_linear_combination +#undef ibz_vec_4_negate +#undef ibz_vec_4_scalar_div +#undef ibz_vec_4_scalar_mul +#undef ibz_vec_4_set +#undef ibz_vec_4_sub +#undef quat_qf_eval + +#define ibz_inv_dim4_make_coeff_mpm SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_mpm) +#define ibz_inv_dim4_make_coeff_pmp SQISIGN_NAMESPACE_GENERIC(ibz_inv_dim4_make_coeff_pmp) +#define ibz_mat_4x4_copy SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_copy) +#define ibz_mat_4x4_equal SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_equal) +#define ibz_mat_4x4_eval SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval) +#define ibz_mat_4x4_eval_t SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_eval_t) +#define ibz_mat_4x4_gcd SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_gcd) +#define ibz_mat_4x4_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_identity) +#define ibz_mat_4x4_inv_with_det_as_denom SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_inv_with_det_as_denom) +#define ibz_mat_4x4_is_identity SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_identity) +#define ibz_mat_4x4_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_mul) +#define ibz_mat_4x4_negate SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_negate) +#define ibz_mat_4x4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_div) +#define ibz_mat_4x4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_scalar_mul) +#define ibz_mat_4x4_transpose SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_transpose) +#define ibz_mat_4x4_zero SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_zero) +#define ibz_vec_4_add SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_add) +#define ibz_vec_4_content SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_content) +#define ibz_vec_4_copy SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy) +#define ibz_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_ibz) +#define ibz_vec_4_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_is_zero) +#define ibz_vec_4_linear_combination SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination) +#define ibz_vec_4_negate SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_negate) +#define ibz_vec_4_scalar_div SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_div) +#define ibz_vec_4_scalar_mul SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul) +#define ibz_vec_4_set SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_set) +#define ibz_vec_4_sub SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_sub) +#define quat_qf_eval SQISIGN_NAMESPACE_GENERIC(quat_qf_eval) + +// Namespacing symbols exported from ec.c: +#undef cswap_points +#undef ec_biscalar_mul +#undef ec_curve_init +#undef ec_curve_init_from_A +#undef ec_curve_normalize_A24 +#undef ec_curve_verify_A +#undef ec_dbl +#undef ec_dbl_iter +#undef ec_dbl_iter_basis +#undef ec_has_zero_coordinate +#undef ec_is_basis_four_torsion +#undef ec_is_equal +#undef ec_is_four_torsion +#undef ec_is_two_torsion +#undef ec_is_zero +#undef ec_j_inv +#undef ec_ladder3pt +#undef ec_mul +#undef ec_normalize_curve +#undef ec_normalize_curve_and_A24 +#undef ec_normalize_point +#undef ec_point_init +#undef select_point +#undef xADD +#undef xDBL +#undef xDBLADD +#undef xDBLMUL +#undef xDBL_A24 +#undef xDBL_E0 +#undef xMUL + +#define cswap_points SQISIGN_NAMESPACE(cswap_points) +#define ec_biscalar_mul SQISIGN_NAMESPACE(ec_biscalar_mul) +#define ec_curve_init SQISIGN_NAMESPACE(ec_curve_init) +#define ec_curve_init_from_A SQISIGN_NAMESPACE(ec_curve_init_from_A) +#define ec_curve_normalize_A24 SQISIGN_NAMESPACE(ec_curve_normalize_A24) +#define ec_curve_verify_A SQISIGN_NAMESPACE(ec_curve_verify_A) +#define ec_dbl SQISIGN_NAMESPACE(ec_dbl) +#define ec_dbl_iter SQISIGN_NAMESPACE(ec_dbl_iter) +#define ec_dbl_iter_basis SQISIGN_NAMESPACE(ec_dbl_iter_basis) +#define ec_has_zero_coordinate SQISIGN_NAMESPACE(ec_has_zero_coordinate) +#define ec_is_basis_four_torsion SQISIGN_NAMESPACE(ec_is_basis_four_torsion) +#define ec_is_equal SQISIGN_NAMESPACE(ec_is_equal) +#define ec_is_four_torsion SQISIGN_NAMESPACE(ec_is_four_torsion) +#define ec_is_two_torsion SQISIGN_NAMESPACE(ec_is_two_torsion) +#define ec_is_zero SQISIGN_NAMESPACE(ec_is_zero) +#define ec_j_inv SQISIGN_NAMESPACE(ec_j_inv) +#define ec_ladder3pt SQISIGN_NAMESPACE(ec_ladder3pt) +#define ec_mul SQISIGN_NAMESPACE(ec_mul) +#define ec_normalize_curve SQISIGN_NAMESPACE(ec_normalize_curve) +#define ec_normalize_curve_and_A24 SQISIGN_NAMESPACE(ec_normalize_curve_and_A24) +#define ec_normalize_point SQISIGN_NAMESPACE(ec_normalize_point) +#define ec_point_init SQISIGN_NAMESPACE(ec_point_init) +#define select_point SQISIGN_NAMESPACE(select_point) +#define xADD SQISIGN_NAMESPACE(xADD) +#define xDBL SQISIGN_NAMESPACE(xDBL) +#define xDBLADD SQISIGN_NAMESPACE(xDBLADD) +#define xDBLMUL SQISIGN_NAMESPACE(xDBLMUL) +#define xDBL_A24 SQISIGN_NAMESPACE(xDBL_A24) +#define xDBL_E0 SQISIGN_NAMESPACE(xDBL_E0) +#define xMUL SQISIGN_NAMESPACE(xMUL) + +// Namespacing symbols exported from ec_jac.c: +#undef ADD +#undef DBL +#undef DBLW +#undef copy_jac_point +#undef jac_from_ws +#undef jac_init +#undef jac_is_equal +#undef jac_neg +#undef jac_to_ws +#undef jac_to_xz +#undef jac_to_xz_add_components +#undef select_jac_point + +#define ADD SQISIGN_NAMESPACE(ADD) +#define DBL SQISIGN_NAMESPACE(DBL) +#define DBLW SQISIGN_NAMESPACE(DBLW) +#define copy_jac_point SQISIGN_NAMESPACE(copy_jac_point) +#define jac_from_ws SQISIGN_NAMESPACE(jac_from_ws) +#define jac_init SQISIGN_NAMESPACE(jac_init) +#define jac_is_equal SQISIGN_NAMESPACE(jac_is_equal) +#define jac_neg SQISIGN_NAMESPACE(jac_neg) +#define jac_to_ws SQISIGN_NAMESPACE(jac_to_ws) +#define jac_to_xz SQISIGN_NAMESPACE(jac_to_xz) +#define jac_to_xz_add_components SQISIGN_NAMESPACE(jac_to_xz_add_components) +#define select_jac_point SQISIGN_NAMESPACE(select_jac_point) + +// Namespacing symbols exported from encode_signature.c: +#undef secret_key_from_bytes +#undef secret_key_to_bytes + +#define secret_key_from_bytes SQISIGN_NAMESPACE(secret_key_from_bytes) +#define secret_key_to_bytes SQISIGN_NAMESPACE(secret_key_to_bytes) + +// Namespacing symbols exported from encode_verification.c: +#undef public_key_from_bytes +#undef public_key_to_bytes +#undef signature_from_bytes +#undef signature_to_bytes + +#define public_key_from_bytes SQISIGN_NAMESPACE(public_key_from_bytes) +#define public_key_to_bytes SQISIGN_NAMESPACE(public_key_to_bytes) +#define signature_from_bytes SQISIGN_NAMESPACE(signature_from_bytes) +#define signature_to_bytes SQISIGN_NAMESPACE(signature_to_bytes) + +// Namespacing symbols exported from finit.c: +#undef ibz_mat_2x2_finalize +#undef ibz_mat_2x2_init +#undef ibz_mat_4x4_finalize +#undef ibz_mat_4x4_init +#undef ibz_vec_2_finalize +#undef ibz_vec_2_init +#undef ibz_vec_4_finalize +#undef ibz_vec_4_init +#undef quat_alg_elem_finalize +#undef quat_alg_elem_init +#undef quat_alg_finalize +#undef quat_alg_init_set +#undef quat_lattice_finalize +#undef quat_lattice_init +#undef quat_left_ideal_finalize +#undef quat_left_ideal_init + +#define ibz_mat_2x2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_finalize) +#define ibz_mat_2x2_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_init) +#define ibz_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_finalize) +#define ibz_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_init) +#define ibz_vec_2_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_finalize) +#define ibz_vec_2_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_init) +#define ibz_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_finalize) +#define ibz_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_init) +#define quat_alg_elem_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_finalize) +#define quat_alg_elem_init SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_init) +#define quat_alg_finalize SQISIGN_NAMESPACE_GENERIC(quat_alg_finalize) +#define quat_alg_init_set SQISIGN_NAMESPACE_GENERIC(quat_alg_init_set) +#define quat_lattice_finalize SQISIGN_NAMESPACE_GENERIC(quat_lattice_finalize) +#define quat_lattice_init SQISIGN_NAMESPACE_GENERIC(quat_lattice_init) +#define quat_left_ideal_finalize SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_finalize) +#define quat_left_ideal_init SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_init) + +// Namespacing symbols exported from fp.c: +#undef fp_select + +#define fp_select SQISIGN_NAMESPACE(fp_select) + +// Namespacing symbols exported from fp.c, fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_exp3div4 +#undef fp_inv +#undef fp_is_square +#undef fp_sqrt + +#define fp_exp3div4 SQISIGN_NAMESPACE(fp_exp3div4) +#define fp_inv SQISIGN_NAMESPACE(fp_inv) +#define fp_is_square SQISIGN_NAMESPACE(fp_is_square) +#define fp_sqrt SQISIGN_NAMESPACE(fp_sqrt) + +// Namespacing symbols exported from fp2.c: +#undef fp2_add +#undef fp2_add_one +#undef fp2_batched_inv +#undef fp2_copy +#undef fp2_cswap +#undef fp2_decode +#undef fp2_encode +#undef fp2_half +#undef fp2_inv +#undef fp2_is_equal +#undef fp2_is_one +#undef fp2_is_square +#undef fp2_is_zero +#undef fp2_mul +#undef fp2_mul_small +#undef fp2_neg +#undef fp2_pow_vartime +#undef fp2_print +#undef fp2_select +#undef fp2_set_one +#undef fp2_set_small +#undef fp2_set_zero +#undef fp2_sqr +#undef fp2_sqrt +#undef fp2_sqrt_verify +#undef fp2_sub + +#define fp2_add SQISIGN_NAMESPACE(fp2_add) +#define fp2_add_one SQISIGN_NAMESPACE(fp2_add_one) +#define fp2_batched_inv SQISIGN_NAMESPACE(fp2_batched_inv) +#define fp2_copy SQISIGN_NAMESPACE(fp2_copy) +#define fp2_cswap SQISIGN_NAMESPACE(fp2_cswap) +#define fp2_decode SQISIGN_NAMESPACE(fp2_decode) +#define fp2_encode SQISIGN_NAMESPACE(fp2_encode) +#define fp2_half SQISIGN_NAMESPACE(fp2_half) +#define fp2_inv SQISIGN_NAMESPACE(fp2_inv) +#define fp2_is_equal SQISIGN_NAMESPACE(fp2_is_equal) +#define fp2_is_one SQISIGN_NAMESPACE(fp2_is_one) +#define fp2_is_square SQISIGN_NAMESPACE(fp2_is_square) +#define fp2_is_zero SQISIGN_NAMESPACE(fp2_is_zero) +#define fp2_mul SQISIGN_NAMESPACE(fp2_mul) +#define fp2_mul_small SQISIGN_NAMESPACE(fp2_mul_small) +#define fp2_neg SQISIGN_NAMESPACE(fp2_neg) +#define fp2_pow_vartime SQISIGN_NAMESPACE(fp2_pow_vartime) +#define fp2_print SQISIGN_NAMESPACE(fp2_print) +#define fp2_select SQISIGN_NAMESPACE(fp2_select) +#define fp2_set_one SQISIGN_NAMESPACE(fp2_set_one) +#define fp2_set_small SQISIGN_NAMESPACE(fp2_set_small) +#define fp2_set_zero SQISIGN_NAMESPACE(fp2_set_zero) +#define fp2_sqr SQISIGN_NAMESPACE(fp2_sqr) +#define fp2_sqrt SQISIGN_NAMESPACE(fp2_sqrt) +#define fp2_sqrt_verify SQISIGN_NAMESPACE(fp2_sqrt_verify) +#define fp2_sub SQISIGN_NAMESPACE(fp2_sub) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c: +#undef fp_copy +#undef fp_cswap +#undef fp_decode +#undef fp_decode_reduce +#undef fp_div3 +#undef fp_encode +#undef fp_half +#undef fp_is_equal +#undef fp_is_zero +#undef fp_mul_small +#undef fp_neg +#undef fp_set_one +#undef fp_set_small +#undef fp_set_zero + +#define fp_copy SQISIGN_NAMESPACE(fp_copy) +#define fp_cswap SQISIGN_NAMESPACE(fp_cswap) +#define fp_decode SQISIGN_NAMESPACE(fp_decode) +#define fp_decode_reduce SQISIGN_NAMESPACE(fp_decode_reduce) +#define fp_div3 SQISIGN_NAMESPACE(fp_div3) +#define fp_encode SQISIGN_NAMESPACE(fp_encode) +#define fp_half SQISIGN_NAMESPACE(fp_half) +#define fp_is_equal SQISIGN_NAMESPACE(fp_is_equal) +#define fp_is_zero SQISIGN_NAMESPACE(fp_is_zero) +#define fp_mul_small SQISIGN_NAMESPACE(fp_mul_small) +#define fp_neg SQISIGN_NAMESPACE(fp_neg) +#define fp_set_one SQISIGN_NAMESPACE(fp_set_one) +#define fp_set_small SQISIGN_NAMESPACE(fp_set_small) +#define fp_set_zero SQISIGN_NAMESPACE(fp_set_zero) + +// Namespacing symbols exported from fp_p27500_64.c, fp_p5248_64.c, fp_p65376_64.c, gf27500.c, gf5248.c, gf65376.c: +#undef fp_add +#undef fp_mul +#undef fp_sqr +#undef fp_sub + +#define fp_add SQISIGN_NAMESPACE(fp_add) +#define fp_mul SQISIGN_NAMESPACE(fp_mul) +#define fp_sqr SQISIGN_NAMESPACE(fp_sqr) +#define fp_sub SQISIGN_NAMESPACE(fp_sub) + +// Namespacing symbols exported from gf27500.c: +#undef gf27500_decode +#undef gf27500_decode_reduce +#undef gf27500_div +#undef gf27500_div3 +#undef gf27500_encode +#undef gf27500_invert +#undef gf27500_legendre +#undef gf27500_sqrt + +#define gf27500_decode SQISIGN_NAMESPACE(gf27500_decode) +#define gf27500_decode_reduce SQISIGN_NAMESPACE(gf27500_decode_reduce) +#define gf27500_div SQISIGN_NAMESPACE(gf27500_div) +#define gf27500_div3 SQISIGN_NAMESPACE(gf27500_div3) +#define gf27500_encode SQISIGN_NAMESPACE(gf27500_encode) +#define gf27500_invert SQISIGN_NAMESPACE(gf27500_invert) +#define gf27500_legendre SQISIGN_NAMESPACE(gf27500_legendre) +#define gf27500_sqrt SQISIGN_NAMESPACE(gf27500_sqrt) + +// Namespacing symbols exported from gf27500.c, gf5248.c, gf65376.c: +#undef fp2_mul_c0 +#undef fp2_mul_c1 +#undef fp2_sq_c0 +#undef fp2_sq_c1 + +#define fp2_mul_c0 SQISIGN_NAMESPACE(fp2_mul_c0) +#define fp2_mul_c1 SQISIGN_NAMESPACE(fp2_mul_c1) +#define fp2_sq_c0 SQISIGN_NAMESPACE(fp2_sq_c0) +#define fp2_sq_c1 SQISIGN_NAMESPACE(fp2_sq_c1) + +// Namespacing symbols exported from gf5248.c: +#undef gf5248_decode +#undef gf5248_decode_reduce +#undef gf5248_div +#undef gf5248_div3 +#undef gf5248_encode +#undef gf5248_invert +#undef gf5248_legendre +#undef gf5248_sqrt + +#define gf5248_decode SQISIGN_NAMESPACE(gf5248_decode) +#define gf5248_decode_reduce SQISIGN_NAMESPACE(gf5248_decode_reduce) +#define gf5248_div SQISIGN_NAMESPACE(gf5248_div) +#define gf5248_div3 SQISIGN_NAMESPACE(gf5248_div3) +#define gf5248_encode SQISIGN_NAMESPACE(gf5248_encode) +#define gf5248_invert SQISIGN_NAMESPACE(gf5248_invert) +#define gf5248_legendre SQISIGN_NAMESPACE(gf5248_legendre) +#define gf5248_sqrt SQISIGN_NAMESPACE(gf5248_sqrt) + +// Namespacing symbols exported from gf65376.c: +#undef gf65376_decode +#undef gf65376_decode_reduce +#undef gf65376_div +#undef gf65376_div3 +#undef gf65376_encode +#undef gf65376_invert +#undef gf65376_legendre +#undef gf65376_sqrt + +#define gf65376_decode SQISIGN_NAMESPACE(gf65376_decode) +#define gf65376_decode_reduce SQISIGN_NAMESPACE(gf65376_decode_reduce) +#define gf65376_div SQISIGN_NAMESPACE(gf65376_div) +#define gf65376_div3 SQISIGN_NAMESPACE(gf65376_div3) +#define gf65376_encode SQISIGN_NAMESPACE(gf65376_encode) +#define gf65376_invert SQISIGN_NAMESPACE(gf65376_invert) +#define gf65376_legendre SQISIGN_NAMESPACE(gf65376_legendre) +#define gf65376_sqrt SQISIGN_NAMESPACE(gf65376_sqrt) + +// Namespacing symbols exported from hd.c: +#undef add_couple_jac_points +#undef copy_bases_to_kernel +#undef couple_jac_to_xz +#undef double_couple_jac_point +#undef double_couple_jac_point_iter +#undef double_couple_point +#undef double_couple_point_iter + +#define add_couple_jac_points SQISIGN_NAMESPACE(add_couple_jac_points) +#define copy_bases_to_kernel SQISIGN_NAMESPACE(copy_bases_to_kernel) +#define couple_jac_to_xz SQISIGN_NAMESPACE(couple_jac_to_xz) +#define double_couple_jac_point SQISIGN_NAMESPACE(double_couple_jac_point) +#define double_couple_jac_point_iter SQISIGN_NAMESPACE(double_couple_jac_point_iter) +#define double_couple_point SQISIGN_NAMESPACE(double_couple_point) +#define double_couple_point_iter SQISIGN_NAMESPACE(double_couple_point_iter) + +// Namespacing symbols exported from hnf.c: +#undef ibz_mat_4x4_is_hnf +#undef ibz_mat_4xn_hnf_mod_core +#undef ibz_vec_4_copy_mod +#undef ibz_vec_4_linear_combination_mod +#undef ibz_vec_4_scalar_mul_mod + +#define ibz_mat_4x4_is_hnf SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_is_hnf) +#define ibz_mat_4xn_hnf_mod_core SQISIGN_NAMESPACE_GENERIC(ibz_mat_4xn_hnf_mod_core) +#define ibz_vec_4_copy_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_copy_mod) +#define ibz_vec_4_linear_combination_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_linear_combination_mod) +#define ibz_vec_4_scalar_mul_mod SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_scalar_mul_mod) + +// Namespacing symbols exported from hnf_internal.c: +#undef ibz_centered_mod +#undef ibz_conditional_assign +#undef ibz_mod_not_zero +#undef ibz_xgcd_with_u_not_0 + +#define ibz_centered_mod SQISIGN_NAMESPACE_GENERIC(ibz_centered_mod) +#define ibz_conditional_assign SQISIGN_NAMESPACE_GENERIC(ibz_conditional_assign) +#define ibz_mod_not_zero SQISIGN_NAMESPACE_GENERIC(ibz_mod_not_zero) +#define ibz_xgcd_with_u_not_0 SQISIGN_NAMESPACE_GENERIC(ibz_xgcd_with_u_not_0) + +// Namespacing symbols exported from ibz_division.c: +#undef ibz_xgcd + +#define ibz_xgcd SQISIGN_NAMESPACE_GENERIC(ibz_xgcd) + +// Namespacing symbols exported from id2iso.c: +#undef change_of_basis_matrix_tate +#undef change_of_basis_matrix_tate_invert +#undef ec_biscalar_mul_ibz_vec +#undef endomorphism_application_even_basis +#undef id2iso_ideal_to_kernel_dlogs_even +#undef id2iso_kernel_dlogs_to_ideal_even +#undef matrix_application_even_basis + +#define change_of_basis_matrix_tate SQISIGN_NAMESPACE(change_of_basis_matrix_tate) +#define change_of_basis_matrix_tate_invert SQISIGN_NAMESPACE(change_of_basis_matrix_tate_invert) +#define ec_biscalar_mul_ibz_vec SQISIGN_NAMESPACE(ec_biscalar_mul_ibz_vec) +#define endomorphism_application_even_basis SQISIGN_NAMESPACE(endomorphism_application_even_basis) +#define id2iso_ideal_to_kernel_dlogs_even SQISIGN_NAMESPACE(id2iso_ideal_to_kernel_dlogs_even) +#define id2iso_kernel_dlogs_to_ideal_even SQISIGN_NAMESPACE(id2iso_kernel_dlogs_to_ideal_even) +#define matrix_application_even_basis SQISIGN_NAMESPACE(matrix_application_even_basis) + +// Namespacing symbols exported from ideal.c: +#undef quat_lideal_add +#undef quat_lideal_class_gram +#undef quat_lideal_conjugate_without_hnf +#undef quat_lideal_copy +#undef quat_lideal_create +#undef quat_lideal_create_principal +#undef quat_lideal_equals +#undef quat_lideal_generator +#undef quat_lideal_inter +#undef quat_lideal_inverse_lattice_without_hnf +#undef quat_lideal_mul +#undef quat_lideal_norm +#undef quat_lideal_right_order +#undef quat_lideal_right_transporter +#undef quat_order_discriminant +#undef quat_order_is_maximal + +#define quat_lideal_add SQISIGN_NAMESPACE_GENERIC(quat_lideal_add) +#define quat_lideal_class_gram SQISIGN_NAMESPACE_GENERIC(quat_lideal_class_gram) +#define quat_lideal_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_conjugate_without_hnf) +#define quat_lideal_copy SQISIGN_NAMESPACE_GENERIC(quat_lideal_copy) +#define quat_lideal_create SQISIGN_NAMESPACE_GENERIC(quat_lideal_create) +#define quat_lideal_create_principal SQISIGN_NAMESPACE_GENERIC(quat_lideal_create_principal) +#define quat_lideal_equals SQISIGN_NAMESPACE_GENERIC(quat_lideal_equals) +#define quat_lideal_generator SQISIGN_NAMESPACE_GENERIC(quat_lideal_generator) +#define quat_lideal_inter SQISIGN_NAMESPACE_GENERIC(quat_lideal_inter) +#define quat_lideal_inverse_lattice_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lideal_inverse_lattice_without_hnf) +#define quat_lideal_mul SQISIGN_NAMESPACE_GENERIC(quat_lideal_mul) +#define quat_lideal_norm SQISIGN_NAMESPACE_GENERIC(quat_lideal_norm) +#define quat_lideal_right_order SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_order) +#define quat_lideal_right_transporter SQISIGN_NAMESPACE_GENERIC(quat_lideal_right_transporter) +#define quat_order_discriminant SQISIGN_NAMESPACE_GENERIC(quat_order_discriminant) +#define quat_order_is_maximal SQISIGN_NAMESPACE_GENERIC(quat_order_is_maximal) + +// Namespacing symbols exported from intbig.c: +#undef ibz_abs +#undef ibz_add +#undef ibz_bitsize +#undef ibz_cmp +#undef ibz_cmp_int32 +#undef ibz_convert_to_str +#undef ibz_copy +#undef ibz_copy_digits +#undef ibz_div +#undef ibz_div_2exp +#undef ibz_div_floor +#undef ibz_divides +#undef ibz_finalize +#undef ibz_gcd +#undef ibz_get +#undef ibz_init +#undef ibz_invmod +#undef ibz_is_even +#undef ibz_is_odd +#undef ibz_is_one +#undef ibz_is_zero +#undef ibz_legendre +#undef ibz_mod +#undef ibz_mod_ui +#undef ibz_mul +#undef ibz_neg +#undef ibz_pow +#undef ibz_pow_mod +#undef ibz_print +#undef ibz_probab_prime +#undef ibz_rand_interval +#undef ibz_rand_interval_bits +#undef ibz_rand_interval_i +#undef ibz_rand_interval_minm_m +#undef ibz_set +#undef ibz_set_from_str +#undef ibz_size_in_base +#undef ibz_sqrt +#undef ibz_sqrt_floor +#undef ibz_sqrt_mod_p +#undef ibz_sub +#undef ibz_swap +#undef ibz_to_digits +#undef ibz_two_adic + +#define ibz_abs SQISIGN_NAMESPACE_GENERIC(ibz_abs) +#define ibz_add SQISIGN_NAMESPACE_GENERIC(ibz_add) +#define ibz_bitsize SQISIGN_NAMESPACE_GENERIC(ibz_bitsize) +#define ibz_cmp SQISIGN_NAMESPACE_GENERIC(ibz_cmp) +#define ibz_cmp_int32 SQISIGN_NAMESPACE_GENERIC(ibz_cmp_int32) +#define ibz_convert_to_str SQISIGN_NAMESPACE_GENERIC(ibz_convert_to_str) +#define ibz_copy SQISIGN_NAMESPACE_GENERIC(ibz_copy) +#define ibz_copy_digits SQISIGN_NAMESPACE_GENERIC(ibz_copy_digits) +#define ibz_div SQISIGN_NAMESPACE_GENERIC(ibz_div) +#define ibz_div_2exp SQISIGN_NAMESPACE_GENERIC(ibz_div_2exp) +#define ibz_div_floor SQISIGN_NAMESPACE_GENERIC(ibz_div_floor) +#define ibz_divides SQISIGN_NAMESPACE_GENERIC(ibz_divides) +#define ibz_finalize SQISIGN_NAMESPACE_GENERIC(ibz_finalize) +#define ibz_gcd SQISIGN_NAMESPACE_GENERIC(ibz_gcd) +#define ibz_get SQISIGN_NAMESPACE_GENERIC(ibz_get) +#define ibz_init SQISIGN_NAMESPACE_GENERIC(ibz_init) +#define ibz_invmod SQISIGN_NAMESPACE_GENERIC(ibz_invmod) +#define ibz_is_even SQISIGN_NAMESPACE_GENERIC(ibz_is_even) +#define ibz_is_odd SQISIGN_NAMESPACE_GENERIC(ibz_is_odd) +#define ibz_is_one SQISIGN_NAMESPACE_GENERIC(ibz_is_one) +#define ibz_is_zero SQISIGN_NAMESPACE_GENERIC(ibz_is_zero) +#define ibz_legendre SQISIGN_NAMESPACE_GENERIC(ibz_legendre) +#define ibz_mod SQISIGN_NAMESPACE_GENERIC(ibz_mod) +#define ibz_mod_ui SQISIGN_NAMESPACE_GENERIC(ibz_mod_ui) +#define ibz_mul SQISIGN_NAMESPACE_GENERIC(ibz_mul) +#define ibz_neg SQISIGN_NAMESPACE_GENERIC(ibz_neg) +#define ibz_pow SQISIGN_NAMESPACE_GENERIC(ibz_pow) +#define ibz_pow_mod SQISIGN_NAMESPACE_GENERIC(ibz_pow_mod) +#define ibz_print SQISIGN_NAMESPACE_GENERIC(ibz_print) +#define ibz_probab_prime SQISIGN_NAMESPACE_GENERIC(ibz_probab_prime) +#define ibz_rand_interval SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval) +#define ibz_rand_interval_bits SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_bits) +#define ibz_rand_interval_i SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_i) +#define ibz_rand_interval_minm_m SQISIGN_NAMESPACE_GENERIC(ibz_rand_interval_minm_m) +#define ibz_set SQISIGN_NAMESPACE_GENERIC(ibz_set) +#define ibz_set_from_str SQISIGN_NAMESPACE_GENERIC(ibz_set_from_str) +#define ibz_size_in_base SQISIGN_NAMESPACE_GENERIC(ibz_size_in_base) +#define ibz_sqrt SQISIGN_NAMESPACE_GENERIC(ibz_sqrt) +#define ibz_sqrt_floor SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_floor) +#define ibz_sqrt_mod_p SQISIGN_NAMESPACE_GENERIC(ibz_sqrt_mod_p) +#define ibz_sub SQISIGN_NAMESPACE_GENERIC(ibz_sub) +#define ibz_swap SQISIGN_NAMESPACE_GENERIC(ibz_swap) +#define ibz_to_digits SQISIGN_NAMESPACE_GENERIC(ibz_to_digits) +#define ibz_two_adic SQISIGN_NAMESPACE_GENERIC(ibz_two_adic) + +// Namespacing symbols exported from integers.c: +#undef ibz_cornacchia_prime +#undef ibz_generate_random_prime + +#define ibz_cornacchia_prime SQISIGN_NAMESPACE_GENERIC(ibz_cornacchia_prime) +#define ibz_generate_random_prime SQISIGN_NAMESPACE_GENERIC(ibz_generate_random_prime) + +// Namespacing symbols exported from isog_chains.c: +#undef ec_eval_even +#undef ec_eval_small_chain +#undef ec_iso_eval +#undef ec_isomorphism + +#define ec_eval_even SQISIGN_NAMESPACE(ec_eval_even) +#define ec_eval_small_chain SQISIGN_NAMESPACE(ec_eval_small_chain) +#define ec_iso_eval SQISIGN_NAMESPACE(ec_iso_eval) +#define ec_isomorphism SQISIGN_NAMESPACE(ec_isomorphism) + +// Namespacing symbols exported from keygen.c: +#undef protocols_keygen +#undef secret_key_finalize +#undef secret_key_init + +#define protocols_keygen SQISIGN_NAMESPACE(protocols_keygen) +#define secret_key_finalize SQISIGN_NAMESPACE(secret_key_finalize) +#define secret_key_init SQISIGN_NAMESPACE(secret_key_init) + +// Namespacing symbols exported from l2.c: +#undef quat_lattice_lll +#undef quat_lll_core + +#define quat_lattice_lll SQISIGN_NAMESPACE_GENERIC(quat_lattice_lll) +#define quat_lll_core SQISIGN_NAMESPACE_GENERIC(quat_lll_core) + +// Namespacing symbols exported from lat_ball.c: +#undef quat_lattice_bound_parallelogram +#undef quat_lattice_sample_from_ball + +#define quat_lattice_bound_parallelogram SQISIGN_NAMESPACE_GENERIC(quat_lattice_bound_parallelogram) +#define quat_lattice_sample_from_ball SQISIGN_NAMESPACE_GENERIC(quat_lattice_sample_from_ball) + +// Namespacing symbols exported from lattice.c: +#undef quat_lattice_add +#undef quat_lattice_alg_elem_mul +#undef quat_lattice_conjugate_without_hnf +#undef quat_lattice_contains +#undef quat_lattice_dual_without_hnf +#undef quat_lattice_equal +#undef quat_lattice_gram +#undef quat_lattice_hnf +#undef quat_lattice_inclusion +#undef quat_lattice_index +#undef quat_lattice_intersect +#undef quat_lattice_mat_alg_coord_mul_without_hnf +#undef quat_lattice_mul +#undef quat_lattice_reduce_denom + +#define quat_lattice_add SQISIGN_NAMESPACE_GENERIC(quat_lattice_add) +#define quat_lattice_alg_elem_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_alg_elem_mul) +#define quat_lattice_conjugate_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_conjugate_without_hnf) +#define quat_lattice_contains SQISIGN_NAMESPACE_GENERIC(quat_lattice_contains) +#define quat_lattice_dual_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_dual_without_hnf) +#define quat_lattice_equal SQISIGN_NAMESPACE_GENERIC(quat_lattice_equal) +#define quat_lattice_gram SQISIGN_NAMESPACE_GENERIC(quat_lattice_gram) +#define quat_lattice_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_hnf) +#define quat_lattice_inclusion SQISIGN_NAMESPACE_GENERIC(quat_lattice_inclusion) +#define quat_lattice_index SQISIGN_NAMESPACE_GENERIC(quat_lattice_index) +#define quat_lattice_intersect SQISIGN_NAMESPACE_GENERIC(quat_lattice_intersect) +#define quat_lattice_mat_alg_coord_mul_without_hnf SQISIGN_NAMESPACE_GENERIC(quat_lattice_mat_alg_coord_mul_without_hnf) +#define quat_lattice_mul SQISIGN_NAMESPACE_GENERIC(quat_lattice_mul) +#define quat_lattice_reduce_denom SQISIGN_NAMESPACE_GENERIC(quat_lattice_reduce_denom) + +// Namespacing symbols exported from lll_applications.c: +#undef quat_lideal_lideal_mul_reduced +#undef quat_lideal_prime_norm_reduced_equivalent +#undef quat_lideal_reduce_basis + +#define quat_lideal_lideal_mul_reduced SQISIGN_NAMESPACE_GENERIC(quat_lideal_lideal_mul_reduced) +#define quat_lideal_prime_norm_reduced_equivalent SQISIGN_NAMESPACE_GENERIC(quat_lideal_prime_norm_reduced_equivalent) +#define quat_lideal_reduce_basis SQISIGN_NAMESPACE_GENERIC(quat_lideal_reduce_basis) + +// Namespacing symbols exported from lll_verification.c: +#undef ibq_vec_4_copy_ibz +#undef quat_lll_bilinear +#undef quat_lll_gram_schmidt_transposed_with_ibq +#undef quat_lll_set_ibq_parameters +#undef quat_lll_verify + +#define ibq_vec_4_copy_ibz SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_copy_ibz) +#define quat_lll_bilinear SQISIGN_NAMESPACE_GENERIC(quat_lll_bilinear) +#define quat_lll_gram_schmidt_transposed_with_ibq SQISIGN_NAMESPACE_GENERIC(quat_lll_gram_schmidt_transposed_with_ibq) +#define quat_lll_set_ibq_parameters SQISIGN_NAMESPACE_GENERIC(quat_lll_set_ibq_parameters) +#define quat_lll_verify SQISIGN_NAMESPACE_GENERIC(quat_lll_verify) + +// Namespacing symbols exported from mem.c: +#undef sqisign_secure_clear +#undef sqisign_secure_free + +#define sqisign_secure_clear SQISIGN_NAMESPACE_GENERIC(sqisign_secure_clear) +#define sqisign_secure_free SQISIGN_NAMESPACE_GENERIC(sqisign_secure_free) + +// Namespacing symbols exported from mp.c: +#undef MUL +#undef mp_add +#undef mp_compare +#undef mp_copy +#undef mp_inv_2e +#undef mp_invert_matrix +#undef mp_is_one +#undef mp_is_zero +#undef mp_mod_2exp +#undef mp_mul +#undef mp_mul2 +#undef mp_neg +#undef mp_print +#undef mp_shiftl +#undef mp_shiftr +#undef mp_sub +#undef multiple_mp_shiftl +#undef select_ct +#undef swap_ct + +#define MUL SQISIGN_NAMESPACE_GENERIC(MUL) +#define mp_add SQISIGN_NAMESPACE_GENERIC(mp_add) +#define mp_compare SQISIGN_NAMESPACE_GENERIC(mp_compare) +#define mp_copy SQISIGN_NAMESPACE_GENERIC(mp_copy) +#define mp_inv_2e SQISIGN_NAMESPACE_GENERIC(mp_inv_2e) +#define mp_invert_matrix SQISIGN_NAMESPACE_GENERIC(mp_invert_matrix) +#define mp_is_one SQISIGN_NAMESPACE_GENERIC(mp_is_one) +#define mp_is_zero SQISIGN_NAMESPACE_GENERIC(mp_is_zero) +#define mp_mod_2exp SQISIGN_NAMESPACE_GENERIC(mp_mod_2exp) +#define mp_mul SQISIGN_NAMESPACE_GENERIC(mp_mul) +#define mp_mul2 SQISIGN_NAMESPACE_GENERIC(mp_mul2) +#define mp_neg SQISIGN_NAMESPACE_GENERIC(mp_neg) +#define mp_print SQISIGN_NAMESPACE_GENERIC(mp_print) +#define mp_shiftl SQISIGN_NAMESPACE_GENERIC(mp_shiftl) +#define mp_shiftr SQISIGN_NAMESPACE_GENERIC(mp_shiftr) +#define mp_sub SQISIGN_NAMESPACE_GENERIC(mp_sub) +#define multiple_mp_shiftl SQISIGN_NAMESPACE_GENERIC(multiple_mp_shiftl) +#define select_ct SQISIGN_NAMESPACE_GENERIC(select_ct) +#define swap_ct SQISIGN_NAMESPACE_GENERIC(swap_ct) + +// Namespacing symbols exported from normeq.c: +#undef quat_change_to_O0_basis +#undef quat_lattice_O0_set +#undef quat_lattice_O0_set_extremal +#undef quat_order_elem_create +#undef quat_represent_integer +#undef quat_sampling_random_ideal_O0_given_norm + +#define quat_change_to_O0_basis SQISIGN_NAMESPACE_GENERIC(quat_change_to_O0_basis) +#define quat_lattice_O0_set SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set) +#define quat_lattice_O0_set_extremal SQISIGN_NAMESPACE_GENERIC(quat_lattice_O0_set_extremal) +#define quat_order_elem_create SQISIGN_NAMESPACE_GENERIC(quat_order_elem_create) +#define quat_represent_integer SQISIGN_NAMESPACE_GENERIC(quat_represent_integer) +#define quat_sampling_random_ideal_O0_given_norm SQISIGN_NAMESPACE_GENERIC(quat_sampling_random_ideal_O0_given_norm) + +// Namespacing symbols exported from printer.c: +#undef ibz_mat_2x2_print +#undef ibz_mat_4x4_print +#undef ibz_vec_2_print +#undef ibz_vec_4_print +#undef quat_alg_elem_print +#undef quat_alg_print +#undef quat_lattice_print +#undef quat_left_ideal_print + +#define ibz_mat_2x2_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_2x2_print) +#define ibz_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibz_mat_4x4_print) +#define ibz_vec_2_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_2_print) +#define ibz_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibz_vec_4_print) +#define quat_alg_elem_print SQISIGN_NAMESPACE_GENERIC(quat_alg_elem_print) +#define quat_alg_print SQISIGN_NAMESPACE_GENERIC(quat_alg_print) +#define quat_lattice_print SQISIGN_NAMESPACE_GENERIC(quat_lattice_print) +#define quat_left_ideal_print SQISIGN_NAMESPACE_GENERIC(quat_left_ideal_print) + +// Namespacing symbols exported from random_input_generation.c: +#undef quat_test_input_random_ideal_generation +#undef quat_test_input_random_ideal_lattice_generation +#undef quat_test_input_random_lattice_generation + +#define quat_test_input_random_ideal_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_generation) +#define quat_test_input_random_ideal_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_ideal_lattice_generation) +#define quat_test_input_random_lattice_generation SQISIGN_NAMESPACE_GENERIC(quat_test_input_random_lattice_generation) + +// Namespacing symbols exported from rationals.c: +#undef ibq_abs +#undef ibq_add +#undef ibq_cmp +#undef ibq_copy +#undef ibq_finalize +#undef ibq_init +#undef ibq_inv +#undef ibq_is_ibz +#undef ibq_is_one +#undef ibq_is_zero +#undef ibq_mat_4x4_finalize +#undef ibq_mat_4x4_init +#undef ibq_mat_4x4_print +#undef ibq_mul +#undef ibq_neg +#undef ibq_reduce +#undef ibq_set +#undef ibq_sub +#undef ibq_to_ibz +#undef ibq_vec_4_finalize +#undef ibq_vec_4_init +#undef ibq_vec_4_print + +#define ibq_abs SQISIGN_NAMESPACE_GENERIC(ibq_abs) +#define ibq_add SQISIGN_NAMESPACE_GENERIC(ibq_add) +#define ibq_cmp SQISIGN_NAMESPACE_GENERIC(ibq_cmp) +#define ibq_copy SQISIGN_NAMESPACE_GENERIC(ibq_copy) +#define ibq_finalize SQISIGN_NAMESPACE_GENERIC(ibq_finalize) +#define ibq_init SQISIGN_NAMESPACE_GENERIC(ibq_init) +#define ibq_inv SQISIGN_NAMESPACE_GENERIC(ibq_inv) +#define ibq_is_ibz SQISIGN_NAMESPACE_GENERIC(ibq_is_ibz) +#define ibq_is_one SQISIGN_NAMESPACE_GENERIC(ibq_is_one) +#define ibq_is_zero SQISIGN_NAMESPACE_GENERIC(ibq_is_zero) +#define ibq_mat_4x4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_finalize) +#define ibq_mat_4x4_init SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_init) +#define ibq_mat_4x4_print SQISIGN_NAMESPACE_GENERIC(ibq_mat_4x4_print) +#define ibq_mul SQISIGN_NAMESPACE_GENERIC(ibq_mul) +#define ibq_neg SQISIGN_NAMESPACE_GENERIC(ibq_neg) +#define ibq_reduce SQISIGN_NAMESPACE_GENERIC(ibq_reduce) +#define ibq_set SQISIGN_NAMESPACE_GENERIC(ibq_set) +#define ibq_sub SQISIGN_NAMESPACE_GENERIC(ibq_sub) +#define ibq_to_ibz SQISIGN_NAMESPACE_GENERIC(ibq_to_ibz) +#define ibq_vec_4_finalize SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_finalize) +#define ibq_vec_4_init SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_init) +#define ibq_vec_4_print SQISIGN_NAMESPACE_GENERIC(ibq_vec_4_print) + +// Namespacing symbols exported from sign.c: +#undef protocols_sign + +#define protocols_sign SQISIGN_NAMESPACE(protocols_sign) + +// Namespacing symbols exported from sqisign.c: +#undef sqisign_keypair +#undef sqisign_open +#undef sqisign_sign +#undef sqisign_verify + +#define sqisign_keypair SQISIGN_NAMESPACE(sqisign_keypair) +#define sqisign_open SQISIGN_NAMESPACE(sqisign_open) +#define sqisign_sign SQISIGN_NAMESPACE(sqisign_sign) +#define sqisign_verify SQISIGN_NAMESPACE(sqisign_verify) + +// Namespacing symbols exported from theta_isogenies.c: +#undef theta_chain_compute_and_eval +#undef theta_chain_compute_and_eval_randomized +#undef theta_chain_compute_and_eval_verify + +#define theta_chain_compute_and_eval SQISIGN_NAMESPACE(theta_chain_compute_and_eval) +#define theta_chain_compute_and_eval_randomized SQISIGN_NAMESPACE(theta_chain_compute_and_eval_randomized) +#define theta_chain_compute_and_eval_verify SQISIGN_NAMESPACE(theta_chain_compute_and_eval_verify) + +// Namespacing symbols exported from theta_structure.c: +#undef double_iter +#undef double_point +#undef is_product_theta_point +#undef theta_precomputation + +#define double_iter SQISIGN_NAMESPACE(double_iter) +#define double_point SQISIGN_NAMESPACE(double_point) +#define is_product_theta_point SQISIGN_NAMESPACE(is_product_theta_point) +#define theta_precomputation SQISIGN_NAMESPACE(theta_precomputation) + +// Namespacing symbols exported from verify.c: +#undef protocols_verify + +#define protocols_verify SQISIGN_NAMESPACE(protocols_verify) + +// Namespacing symbols exported from xeval.c: +#undef xeval_2 +#undef xeval_2_singular +#undef xeval_4 + +#define xeval_2 SQISIGN_NAMESPACE(xeval_2) +#define xeval_2_singular SQISIGN_NAMESPACE(xeval_2_singular) +#define xeval_4 SQISIGN_NAMESPACE(xeval_4) + +// Namespacing symbols exported from xisog.c: +#undef xisog_2 +#undef xisog_2_singular +#undef xisog_4 + +#define xisog_2 SQISIGN_NAMESPACE(xisog_2) +#define xisog_2_singular SQISIGN_NAMESPACE(xisog_2_singular) +#define xisog_4 SQISIGN_NAMESPACE(xisog_4) + + +#endif + diff --git a/src/pqm4/sqisign_lvl5/ref/theta_isogenies.c b/src/pqm4/sqisign_lvl5/ref/theta_isogenies.c new file mode 100644 index 0000000..478a9ab --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/theta_isogenies.c @@ -0,0 +1,1283 @@ +#include "theta_isogenies.h" +#include +#include +#include +#include +#include + +// Select a base change matrix in constant time, with M1 a regular +// base change matrix and M2 a precomputed base change matrix +// If option = 0 then M <- M1, else if option = 0xFF...FF then M <- M2 +static inline void +select_base_change_matrix(basis_change_matrix_t *M, + const basis_change_matrix_t *M1, + const precomp_basis_change_matrix_t *M2, + const uint32_t option) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + fp2_select(&M->m[i][j], &M1->m[i][j], &FP2_CONSTANTS[M2->m[i][j]], option); +} + +// Set a regular base change matrix from a precomputed one +static inline void +set_base_change_matrix_from_precomp(basis_change_matrix_t *res, const precomp_basis_change_matrix_t *M) +{ + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + res->m[i][j] = FP2_CONSTANTS[M->m[i][j]]; +} + +static inline void +choose_index_theta_point(fp2_t *res, int ind, const theta_point_t *T) +{ + const fp2_t *src = NULL; + switch (ind % 4) { + case 0: + src = &T->x; + break; + case 1: + src = &T->y; + break; + case 2: + src = &T->z; + break; + case 3: + src = &T->t; + break; + default: + assert(0); + } + fp2_copy(res, src); +} + +// same as apply_isomorphism method but more efficient when the t component of P is zero. +static void +apply_isomorphism_general(theta_point_t *res, + const basis_change_matrix_t *M, + const theta_point_t *P, + const bool Pt_not_zero) +{ + fp2_t x1; + theta_point_t temp; + + fp2_mul(&temp.x, &P->x, &M->m[0][0]); + fp2_mul(&x1, &P->y, &M->m[0][1]); + fp2_add(&temp.x, &temp.x, &x1); + fp2_mul(&x1, &P->z, &M->m[0][2]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&temp.y, &P->x, &M->m[1][0]); + fp2_mul(&x1, &P->y, &M->m[1][1]); + fp2_add(&temp.y, &temp.y, &x1); + fp2_mul(&x1, &P->z, &M->m[1][2]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&temp.z, &P->x, &M->m[2][0]); + fp2_mul(&x1, &P->y, &M->m[2][1]); + fp2_add(&temp.z, &temp.z, &x1); + fp2_mul(&x1, &P->z, &M->m[2][2]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&temp.t, &P->x, &M->m[3][0]); + fp2_mul(&x1, &P->y, &M->m[3][1]); + fp2_add(&temp.t, &temp.t, &x1); + fp2_mul(&x1, &P->z, &M->m[3][2]); + fp2_add(&temp.t, &temp.t, &x1); + + if (Pt_not_zero) { + fp2_mul(&x1, &P->t, &M->m[0][3]); + fp2_add(&temp.x, &temp.x, &x1); + + fp2_mul(&x1, &P->t, &M->m[1][3]); + fp2_add(&temp.y, &temp.y, &x1); + + fp2_mul(&x1, &P->t, &M->m[2][3]); + fp2_add(&temp.z, &temp.z, &x1); + + fp2_mul(&x1, &P->t, &M->m[3][3]); + fp2_add(&temp.t, &temp.t, &x1); + } + + fp2_copy(&res->x, &temp.x); + fp2_copy(&res->y, &temp.y); + fp2_copy(&res->z, &temp.z); + fp2_copy(&res->t, &temp.t); +} + +static void +apply_isomorphism(theta_point_t *res, const basis_change_matrix_t *M, const theta_point_t *P) +{ + apply_isomorphism_general(res, M, P, true); +} + +// set res = M1 * M2 with matrix multiplication +static void +base_change_matrix_multiplication(basis_change_matrix_t *res, + const basis_change_matrix_t *M1, + const basis_change_matrix_t *M2) +{ + basis_change_matrix_t tmp; + fp2_t sum, m_ik, m_kj; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + fp2_set_zero(&sum); + for (int k = 0; k < 4; k++) { + m_ik = M1->m[i][k]; + m_kj = M2->m[k][j]; + fp2_mul(&m_ik, &m_ik, &m_kj); + fp2_add(&sum, &sum, &m_ik); + } + tmp.m[i][j] = sum; + } + } + *res = tmp; +} + +// compute the theta_point corresponding to the couple of point T on an elliptic product +static void +base_change(theta_point_t *out, const theta_gluing_t *phi, const theta_couple_point_t *T) +{ + theta_point_t null_point; + + // null_point = (a : b : c : d) + // a = P1.x P2.x, b = P1.x P2.z, c = P1.z P2.x, d = P1.z P2.z + fp2_mul(&null_point.x, &T->P1.x, &T->P2.x); + fp2_mul(&null_point.y, &T->P1.x, &T->P2.z); + fp2_mul(&null_point.z, &T->P2.x, &T->P1.z); + fp2_mul(&null_point.t, &T->P1.z, &T->P2.z); + + // Apply the basis change + apply_isomorphism(out, &phi->M, &null_point); +} + +static void +action_by_translation_z_and_det(fp2_t *z_inv, fp2_t *det_inv, const ec_point_t *P4, const ec_point_t *P2) +{ + // Store the Z-coordinate to invert + fp2_copy(z_inv, &P4->z); + + // Then collect detij = xij wij - uij zij + fp2_t tmp; + fp2_mul(det_inv, &P4->x, &P2->z); + fp2_mul(&tmp, &P4->z, &P2->x); + fp2_sub(det_inv, det_inv, &tmp); +} + +static void +action_by_translation_compute_matrix(translation_matrix_t *G, + const ec_point_t *P4, + const ec_point_t *P2, + const fp2_t *z_inv, + const fp2_t *det_inv) +{ + fp2_t tmp; + + // Gi.g10 = uij xij /detij - xij/zij + fp2_mul(&tmp, &P4->x, z_inv); + fp2_mul(&G->g10, &P4->x, &P2->x); + fp2_mul(&G->g10, &G->g10, det_inv); + fp2_sub(&G->g10, &G->g10, &tmp); + + // Gi.g11 = uij zij * detij + fp2_mul(&G->g11, &P2->x, det_inv); + fp2_mul(&G->g11, &G->g11, &P4->z); + + // Gi.g00 = -Gi.g11 + fp2_neg(&G->g00, &G->g11); + + // Gi.g01 = - wij zij detij + fp2_mul(&G->g01, &P2->z, det_inv); + fp2_mul(&G->g01, &G->g01, &P4->z); + fp2_neg(&G->g01, &G->g01); +} + +// Returns 1 if the basis is as expected and 0 otherwise +// We only expect this to fail for malformed signatures, so +// do not require this to run in constant time. +static int +verify_two_torsion(const theta_couple_point_t *K1_2, const theta_couple_point_t *K2_2, const theta_couple_curve_t *E12) +{ + // First check if any point in K1_2 or K2_2 is zero, if they are then the points did not have + // order 8 when we started gluing + if (ec_is_zero(&K1_2->P1) | ec_is_zero(&K1_2->P2) | ec_is_zero(&K2_2->P1) | ec_is_zero(&K2_2->P2)) { + return 0; + } + + // Now ensure that P1, Q1 and P2, Q2 are independent. For points of order two this means + // that they're not the same + if (ec_is_equal(&K1_2->P1, &K2_2->P1) | ec_is_equal(&K1_2->P2, &K2_2->P2)) { + return 0; + } + + // Finally, double points to ensure all points have order exactly 0 + theta_couple_point_t O1, O2; + double_couple_point(&O1, K1_2, E12); + double_couple_point(&O2, K2_2, E12); + // If this check fails then the points had order 2*f for some f, and the kernel is malformed. + if (!(ec_is_zero(&O1.P1) & ec_is_zero(&O1.P2) & ec_is_zero(&O2.P1) & ec_is_zero(&O2.P2))) { + return 0; + } + + return 1; +} + +// Computes the action by translation for four points +// (P1, P2) and (Q1, Q2) on E1 x E2 simultaneously to +// save on inversions. +// Returns 0 if any of Pi or Qi does not have order 2 +// and 1 otherwise +static int +action_by_translation(translation_matrix_t *Gi, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute points of order 2 from Ki_4 + theta_couple_point_t K1_2, K2_2; + double_couple_point(&K1_2, K1_4, E12); + double_couple_point(&K2_2, K2_4, E12); + + if (!verify_two_torsion(&K1_2, &K2_2, E12)) { + return 0; + } + + // We need to invert four Z coordinates and + // four determinants which we do with batched + // inversion + fp2_t inverses[8]; + action_by_translation_z_and_det(&inverses[0], &inverses[4], &K1_4->P1, &K1_2.P1); + action_by_translation_z_and_det(&inverses[1], &inverses[5], &K1_4->P2, &K1_2.P2); + action_by_translation_z_and_det(&inverses[2], &inverses[6], &K2_4->P1, &K2_2.P1); + action_by_translation_z_and_det(&inverses[3], &inverses[7], &K2_4->P2, &K2_2.P2); + + fp2_batched_inv(inverses, 8); + if (fp2_is_zero(&inverses[0])) + return 0; // something was wrong with our input (which somehow was not caught by + // verify_two_torsion) + + action_by_translation_compute_matrix(&Gi[0], &K1_4->P1, &K1_2.P1, &inverses[0], &inverses[4]); + action_by_translation_compute_matrix(&Gi[1], &K1_4->P2, &K1_2.P2, &inverses[1], &inverses[5]); + action_by_translation_compute_matrix(&Gi[2], &K2_4->P1, &K2_2.P1, &inverses[2], &inverses[6]); + action_by_translation_compute_matrix(&Gi[3], &K2_4->P2, &K2_2.P2, &inverses[3], &inverses[7]); + + return 1; +} + +// Given the appropriate four torsion, computes the +// change of basis to compute the correct theta null +// point. +// Returns 0 if the order of K1_4 or K2_4 is not 4 +static int +gluing_change_of_basis(basis_change_matrix_t *M, + const theta_couple_point_t *K1_4, + const theta_couple_point_t *K2_4, + const theta_couple_curve_t *E12) +{ + // Compute the four 2x2 matrices for the action by translation + // on the four points: + translation_matrix_t Gi[4]; + if (!action_by_translation(Gi, K1_4, K2_4, E12)) + return 0; + + // Computation of the 4x4 matrix from Mij + // t001, t101 (resp t002, t102) first column of M11 * M21 (resp M12 * M22) + fp2_t t001, t101, t002, t102, tmp; + + fp2_mul(&t001, &Gi[0].g00, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g01, &Gi[2].g10); + fp2_add(&t001, &t001, &tmp); + + fp2_mul(&t101, &Gi[0].g10, &Gi[2].g00); + fp2_mul(&tmp, &Gi[0].g11, &Gi[2].g10); + fp2_add(&t101, &t101, &tmp); + + fp2_mul(&t002, &Gi[1].g00, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g01, &Gi[3].g10); + fp2_add(&t002, &t002, &tmp); + + fp2_mul(&t102, &Gi[1].g10, &Gi[3].g00); + fp2_mul(&tmp, &Gi[1].g11, &Gi[3].g10); + fp2_add(&t102, &t102, &tmp); + + // trace for the first row + fp2_set_one(&M->m[0][0]); + fp2_mul(&tmp, &t001, &t002); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g00); + fp2_add(&M->m[0][0], &M->m[0][0], &tmp); + + fp2_mul(&M->m[0][1], &t001, &t102); + fp2_mul(&tmp, &Gi[2].g00, &Gi[3].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + fp2_mul(&tmp, &Gi[0].g00, &Gi[1].g10); + fp2_add(&M->m[0][1], &M->m[0][1], &tmp); + + fp2_mul(&M->m[0][2], &t101, &t002); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g00); + fp2_add(&M->m[0][2], &M->m[0][2], &tmp); + + fp2_mul(&M->m[0][3], &t101, &t102); + fp2_mul(&tmp, &Gi[2].g10, &Gi[3].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + fp2_mul(&tmp, &Gi[0].g10, &Gi[1].g10); + fp2_add(&M->m[0][3], &M->m[0][3], &tmp); + + // Compute the action of (0,out.K2_4.P2) for the second row + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][1]); + fp2_mul(&M->m[1][0], &Gi[3].g00, &M->m[0][0]); + fp2_add(&M->m[1][0], &M->m[1][0], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][1]); + fp2_mul(&M->m[1][1], &Gi[3].g10, &M->m[0][0]); + fp2_add(&M->m[1][1], &M->m[1][1], &tmp); + + fp2_mul(&tmp, &Gi[3].g01, &M->m[0][3]); + fp2_mul(&M->m[1][2], &Gi[3].g00, &M->m[0][2]); + fp2_add(&M->m[1][2], &M->m[1][2], &tmp); + + fp2_mul(&tmp, &Gi[3].g11, &M->m[0][3]); + fp2_mul(&M->m[1][3], &Gi[3].g10, &M->m[0][2]); + fp2_add(&M->m[1][3], &M->m[1][3], &tmp); + + // compute the action of (K1_4.P1,0) for the third row + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][2]); + fp2_mul(&M->m[2][0], &Gi[0].g00, &M->m[0][0]); + fp2_add(&M->m[2][0], &M->m[2][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[0][3]); + fp2_mul(&M->m[2][1], &Gi[0].g00, &M->m[0][1]); + fp2_add(&M->m[2][1], &M->m[2][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][2]); + fp2_mul(&M->m[2][2], &Gi[0].g10, &M->m[0][0]); + fp2_add(&M->m[2][2], &M->m[2][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[0][3]); + fp2_mul(&M->m[2][3], &Gi[0].g10, &M->m[0][1]); + fp2_add(&M->m[2][3], &M->m[2][3], &tmp); + + // compute the action of (K1_4.P1,K2_4.P2) for the final row + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][2]); + fp2_mul(&M->m[3][0], &Gi[0].g00, &M->m[1][0]); + fp2_add(&M->m[3][0], &M->m[3][0], &tmp); + + fp2_mul(&tmp, &Gi[0].g01, &M->m[1][3]); + fp2_mul(&M->m[3][1], &Gi[0].g00, &M->m[1][1]); + fp2_add(&M->m[3][1], &M->m[3][1], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][2]); + fp2_mul(&M->m[3][2], &Gi[0].g10, &M->m[1][0]); + fp2_add(&M->m[3][2], &M->m[3][2], &tmp); + + fp2_mul(&tmp, &Gi[0].g11, &M->m[1][3]); + fp2_mul(&M->m[3][3], &Gi[0].g10, &M->m[1][1]); + fp2_add(&M->m[3][3], &M->m[3][3], &tmp); + + return 1; +} + +/** + * @brief Compute the gluing isogeny from an elliptic product + * + * @param out Output: the theta_gluing + * @param K1_8 a couple point + * @param E12 an elliptic curve product + * @param K2_8 a point in E2[8] + * + * out : E1xE2 -> A of kernel [4](K1_8,K2_8) + * if the kernel supplied has the incorrect order, or gluing seems malformed, + * returns 0, otherwise returns 1. + */ +static int +gluing_compute(theta_gluing_t *out, + const theta_couple_curve_t *E12, + const theta_couple_jac_point_t *xyK1_8, + const theta_couple_jac_point_t *xyK2_8, + bool verify) +{ + // Ensure that we have been given the eight torsion +#ifndef NDEBUG + { + int check = test_jac_order_twof(&xyK1_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK1_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P1, &E12->E1, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK1_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P1 does not have order 8"); + check = test_jac_order_twof(&xyK2_8->P2, &E12->E2, 3); + if (!check) + debug_print("xyK2_8->P2 does not have order 8"); + } +#endif + + out->xyK1_8 = *xyK1_8; + out->domain = *E12; + + // Given points in E[8] x E[8] we need the four torsion below + theta_couple_jac_point_t xyK1_4, xyK2_4; + + double_couple_jac_point(&xyK1_4, xyK1_8, E12); + double_couple_jac_point(&xyK2_4, xyK2_8, E12); + + // Convert from (X:Y:Z) coordinates to (X:Z) + theta_couple_point_t K1_8, K2_8; + theta_couple_point_t K1_4, K2_4; + + couple_jac_to_xz(&K1_8, xyK1_8); + couple_jac_to_xz(&K2_8, xyK2_8); + couple_jac_to_xz(&K1_4, &xyK1_4); + couple_jac_to_xz(&K2_4, &xyK2_4); + + // Set the basis change matrix, if we have not been given a valid K[8] for this computation + // gluing_change_of_basis will detect this and return 0 + if (!gluing_change_of_basis(&out->M, &K1_4, &K2_4, E12)) { + debug_print("gluing failed as kernel does not have correct order"); + return 0; + } + + // apply the base change to the kernel + theta_point_t TT1, TT2; + + base_change(&TT1, out, &K1_8); + base_change(&TT2, out, &K2_8); + + // compute the codomain + to_squared_theta(&TT1, &TT1); + to_squared_theta(&TT2, &TT2); + + // If the kernel is well formed then TT1.t and TT2.t are zero + // if they are not, we exit early as the signature we are validating + // is probably malformed + if (!(fp2_is_zero(&TT1.t) & fp2_is_zero(&TT2.t))) { + debug_print("gluing failed TT1.t or TT2.t is not zero"); + return 0; + } + // Test our projective factors are non zero + if (fp2_is_zero(&TT1.x) | fp2_is_zero(&TT2.x) | fp2_is_zero(&TT1.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT1.z)) + return 0; // invalid input + + // Projective factor: Ax + fp2_mul(&out->codomain.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.y, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.z, &TT1.x, &TT2.z); + fp2_set_zero(&out->codomain.t); + // Projective factor: ABCxz + fp2_mul(&out->precomputation.x, &TT1.y, &TT2.z); + fp2_copy(&out->precomputation.y, &out->codomain.z); + fp2_copy(&out->precomputation.z, &out->codomain.y); + fp2_set_zero(&out->precomputation.t); + + // Compute the two components of phi(K1_8) = (x:x:y:y). + fp2_mul(&out->imageK1_8.x, &TT1.x, &out->precomputation.x); + fp2_mul(&out->imageK1_8.y, &TT1.z, &out->precomputation.z); + + // If K1_8 and K2_8 are our 8-torsion points, this ensures that the + // 4-torsion points [2]K1_8 and [2]K2_8 are isotropic. + if (verify) { + fp2_t t1, t2; + fp2_mul(&t1, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&out->imageK1_8.x, &t1)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t2, &t1)) + return 0; + } + + // compute the final codomain + hadamard(&out->codomain, &out->codomain); + return 1; +} + +// sub routine of the gluing eval +static void +gluing_eval_point(theta_point_t *image, const theta_couple_jac_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T1, T2; + add_components_t add_comp1, add_comp2; + + // Compute the cross addition components of P1+Q1 and P2+Q2 + jac_to_xz_add_components(&add_comp1, &P->P1, &phi->xyK1_8.P1, &phi->domain.E1); + jac_to_xz_add_components(&add_comp2, &P->P2, &phi->xyK1_8.P2, &phi->domain.E2); + + // Compute T1 and T2 derived from the cross addition components. + fp2_mul(&T1.x, &add_comp1.u, &add_comp2.u); // T1x = u1u2 + fp2_mul(&T2.t, &add_comp1.v, &add_comp2.v); // T2t = v1v2 + fp2_add(&T1.x, &T1.x, &T2.t); // T1x = u1u2 + v1v2 + fp2_mul(&T1.y, &add_comp1.u, &add_comp2.w); // T1y = u1w2 + fp2_mul(&T1.z, &add_comp1.w, &add_comp2.u); // T1z = w1u2 + fp2_mul(&T1.t, &add_comp1.w, &add_comp2.w); // T1t = w1w2 + fp2_add(&T2.x, &add_comp1.u, &add_comp1.v); // T2x = (u1+v1) + fp2_add(&T2.y, &add_comp2.u, &add_comp2.v); // T2y = (u2+v2) + fp2_mul(&T2.x, &T2.x, &T2.y); // T2x = (u1+v1)(u2+v2) + fp2_sub(&T2.x, &T2.x, &T1.x); // T1x = v1u2 + u1v2 + fp2_mul(&T2.y, &add_comp1.v, &add_comp2.w); // T2y = v1w2 + fp2_mul(&T2.z, &add_comp1.w, &add_comp2.v); // T2z = w1v2 + fp2_set_zero(&T2.t); // T2t = 0 + + // Apply the basis change and compute their respective square + // theta(P+Q) = M.T1 - M.T2 and theta(P-Q) = M.T1 + M.T2 + apply_isomorphism_general(&T1, &phi->M, &T1, true); + apply_isomorphism_general(&T2, &phi->M, &T2, false); + pointwise_square(&T1, &T1); + pointwise_square(&T2, &T2); + + // the difference between the two is therefore theta(P+Q)theta(P-Q) + // whose hadamard transform is then the product of the dual + // theta_points of phi(P) and phi(Q). + fp2_sub(&T1.x, &T1.x, &T2.x); + fp2_sub(&T1.y, &T1.y, &T2.y); + fp2_sub(&T1.z, &T1.z, &T2.z); + fp2_sub(&T1.t, &T1.t, &T2.t); + hadamard(&T1, &T1); + + // Compute (x, y, z, t) + // As imageK1_8 = (x:x:y:y), its inverse is (y:y:x:x). + fp2_mul(&image->x, &T1.x, &phi->imageK1_8.y); + fp2_mul(&image->y, &T1.y, &phi->imageK1_8.y); + fp2_mul(&image->z, &T1.z, &phi->imageK1_8.x); + fp2_mul(&image->t, &T1.t, &phi->imageK1_8.x); + + hadamard(image, image); +} + +// Same as gluing_eval_point but in the very special case where we already know that the point will +// have a zero coordinate at the place where the zero coordinate of the dual_theta_nullpoint would +// have made the computation difficult +static int +gluing_eval_point_special_case(theta_point_t *image, const theta_couple_point_t *P, const theta_gluing_t *phi) +{ + theta_point_t T; + + // Apply the basis change + base_change(&T, phi, P); + + // Apply the to_squared_theta transform + to_squared_theta(&T, &T); + + // This coordinate should always be 0 in a gluing because D=0. + // If this is not the case, something went very wrong, so reject + if (!fp2_is_zero(&T.t)) + return 0; + + // Compute (x, y, z, t) + fp2_mul(&image->x, &T.x, &phi->precomputation.x); + fp2_mul(&image->y, &T.y, &phi->precomputation.y); + fp2_mul(&image->z, &T.z, &phi->precomputation.z); + fp2_set_zero(&image->t); + + hadamard(image, image); + return 1; +} + +/** + * @brief Evaluate a gluing isogeny from an elliptic product on a basis + * + * @param image1 Output: the theta_point of the image of the first couple of points + * @param image2 Output : the theta point of the image of the second couple of points + * @param xyT1: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param xyT2: A pair of points (X : Y : Z) on E1E2 to glue using phi + * @param phi : a gluing isogeny E1 x E2 -> A + * + **/ +static void +gluing_eval_basis(theta_point_t *image1, + theta_point_t *image2, + const theta_couple_jac_point_t *xyT1, + const theta_couple_jac_point_t *xyT2, + const theta_gluing_t *phi) +{ + gluing_eval_point(image1, xyT1, phi); + gluing_eval_point(image2, xyT2, phi); +} + +/** + * @brief Compute a (2,2) isogeny in dimension 2 in the theta_model + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_8 a point in A[8] + * @param T2_8 a point in A[8] + * @param hadamard_bool_1 a boolean used for the last two steps of the chain + * @param hadamard_bool_2 a boolean used for the last two steps of the chain + * + * out : A -> B of kernel [4](T1_8,T2_8) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * verify: add extra sanity check to ensure our 8-torsion points are coherent with the isogeny + * + */ +static int +theta_isogeny_compute(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_8, + const theta_point_t *T2_8, + bool hadamard_bool_1, + bool hadamard_bool_2, + bool verify) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_8; + out->T2_8 = *T2_8; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_8); + to_squared_theta(&TT1, &TT1); + hadamard(&TT2, T2_8); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_8); + to_squared_theta(&TT2, T2_8); + } + + fp2_t t1, t2; + + // Test that our projective factor ABCDxzw is non zero, where + // TT1=(Ax, Bx, Cy, Dy), TT2=(Az, Bw, Cz, Dw) + // But ABCDxzw=0 can only happen if we had an unexpected splitting in + // the isogeny chain. + // In either case reject + // (this is not strictly necessary, we could just return (0:0:0:0)) + if (fp2_is_zero(&TT2.x) | fp2_is_zero(&TT2.y) | fp2_is_zero(&TT2.z) | fp2_is_zero(&TT2.t) | fp2_is_zero(&TT1.x) | + fp2_is_zero(&TT1.y)) + return 0; + + fp2_mul(&t1, &TT1.x, &TT2.y); + fp2_mul(&t2, &TT1.y, &TT2.x); + fp2_mul(&out->codomain.null_point.x, &TT2.x, &t1); + fp2_mul(&out->codomain.null_point.y, &TT2.y, &t2); + fp2_mul(&out->codomain.null_point.z, &TT2.z, &t1); + fp2_mul(&out->codomain.null_point.t, &TT2.t, &t2); + fp2_t t3; + fp2_mul(&t3, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.x, &t3, &TT1.y); + fp2_mul(&out->precomputation.y, &t3, &TT1.x); + fp2_copy(&out->precomputation.z, &out->codomain.null_point.t); + fp2_copy(&out->precomputation.t, &out->codomain.null_point.z); + + // If T1_8 and T2_8 are our 8-torsion points, this ensures that the + // 4-torsion points 2T1_8 and 2T2_8 are isotropic. + if (verify) { + fp2_mul(&t1, &TT1.x, &out->precomputation.x); + fp2_mul(&t2, &TT1.y, &out->precomputation.y); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT1.z, &out->precomputation.z); + fp2_mul(&t2, &TT1.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.x, &out->precomputation.x); + fp2_mul(&t2, &TT2.z, &out->precomputation.z); + if (!fp2_is_equal(&t1, &t2)) + return 0; + fp2_mul(&t1, &TT2.y, &out->precomputation.y); + fp2_mul(&t2, &TT2.t, &out->precomputation.t); + if (!fp2_is_equal(&t1, &t2)) + return 0; + } + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } + return 1; +} + +/** + * @brief Compute a (2,2) isogeny when only the 4 torsion above the kernel is known and not the 8 + * torsion + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_4 a point in A[4] + * @param T2_4 a point in A[4] + * @param hadamard_bool_1 a boolean + * @param hadamard_bool_2 a boolean + * + * out : A -> B of kernel [2](T1_4,T2_4) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_4(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_4, + const theta_point_t *T2_4, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_4; + out->T2_8 = *T2_4; + out->codomain.precomputation = false; + + theta_point_t TT1, TT2; + // we will compute: + // TT1 = (xAB, _ , xCD, _) + // TT2 = (AA,BB,CC,DD) + + // fp2_t xA_inv,zA_inv,tB_inv; + + if (hadamard_bool_1) { + hadamard(&TT1, T1_4); + to_squared_theta(&TT1, &TT1); + + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT1, T1_4); + to_squared_theta(&TT2, &A->null_point); + } + + fp2_t sqaabb, sqaacc; + fp2_mul(&sqaabb, &TT2.x, &TT2.y); + fp2_mul(&sqaacc, &TT2.x, &TT2.z); + // No need to check the square roots, only used for signing. + // sqaabb = sqrt(AA*BB) + fp2_sqrt(&sqaabb); + // sqaacc = sqrt(AA*CC) + fp2_sqrt(&sqaacc); + + // we compute out->codomain.null_point = (xAB * sqaacc * AA, xAB *sqaabb *sqaacc, xCD*sqaabb * + // AA) out->precomputation = (xAB * BB * CC *DD , sqaabb * CC * DD * xAB , sqaacc * BB* DD * xAB + // , xCD * sqaabb *sqaacc * BB) + + fp2_mul(&out->codomain.null_point.y, &sqaabb, &sqaacc); + fp2_mul(&out->precomputation.t, &out->codomain.null_point.y, &TT1.z); + fp2_mul(&out->codomain.null_point.y, &out->codomain.null_point.y, + &TT1.x); // done for out->codomain.null_point.y + + fp2_mul(&out->codomain.null_point.t, &TT1.z, &sqaabb); + fp2_mul(&out->codomain.null_point.t, &out->codomain.null_point.t, + &TT2.x); // done for out->codomain.null_point.t + + fp2_mul(&out->codomain.null_point.x, &TT1.x, &TT2.x); + fp2_mul(&out->codomain.null_point.z, &out->codomain.null_point.x, + &TT2.z); // done for out->codomain.null_point.z + fp2_mul(&out->codomain.null_point.x, &out->codomain.null_point.x, + &sqaacc); // done for out->codomain.null_point.x + + fp2_mul(&out->precomputation.x, &TT1.x, &TT2.t); + fp2_mul(&out->precomputation.z, &out->precomputation.x, &TT2.y); + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.z); + fp2_mul(&out->precomputation.y, &out->precomputation.x, &sqaabb); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &out->precomputation.z, &sqaacc); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +/** + * @brief Compute a (2,2) isogeny when only the kernel is known and not the 8 or 4 torsion above + * + * @param out Output: the theta_isogeny + * @param A a theta null point for the domain + * @param T1_2 a point in A[2] + * @param T2_2 a point in A[2] + * @param hadamard_bool_1 a boolean + * @param boo2 a boolean + * + * out : A -> B of kernel (T1_2,T2_2) + * hadamard_bool_1 controls if the domain is in standard or dual coordinates + * hadamard_bool_2 controls if the codomain is in standard or dual coordinates + * + */ +static void +theta_isogeny_compute_2(theta_isogeny_t *out, + const theta_structure_t *A, + const theta_point_t *T1_2, + const theta_point_t *T2_2, + bool hadamard_bool_1, + bool hadamard_bool_2) +{ + out->hadamard_bool_1 = hadamard_bool_1; + out->hadamard_bool_2 = hadamard_bool_2; + out->domain = *A; + out->T1_8 = *T1_2; + out->T2_8 = *T2_2; + out->codomain.precomputation = false; + + theta_point_t TT2; + // we will compute: + // TT2 = (AA,BB,CC,DD) + + if (hadamard_bool_1) { + hadamard(&TT2, &A->null_point); + to_squared_theta(&TT2, &TT2); + } else { + to_squared_theta(&TT2, &A->null_point); + } + + // we compute out->codomain.null_point = (AA,sqaabb, sqaacc, sqaadd) + // out->precomputation = ( BB * CC *DD , sqaabb * CC * DD , sqaacc * BB* DD , sqaadd * BB * CC) + fp2_copy(&out->codomain.null_point.x, &TT2.x); + fp2_mul(&out->codomain.null_point.y, &TT2.x, &TT2.y); + fp2_mul(&out->codomain.null_point.z, &TT2.x, &TT2.z); + fp2_mul(&out->codomain.null_point.t, &TT2.x, &TT2.t); + // No need to check the square roots, only used for signing. + fp2_sqrt(&out->codomain.null_point.y); + fp2_sqrt(&out->codomain.null_point.z); + fp2_sqrt(&out->codomain.null_point.t); + + fp2_mul(&out->precomputation.x, &TT2.z, &TT2.t); + fp2_mul(&out->precomputation.y, + &out->precomputation.x, + &out->codomain.null_point.y); // done for out->precomputation.y + fp2_mul(&out->precomputation.x, &out->precomputation.x, &TT2.y); // done for out->precomputation.x + fp2_mul(&out->precomputation.z, &TT2.t, &out->codomain.null_point.z); + fp2_mul(&out->precomputation.z, &out->precomputation.z, &TT2.y); // done for out->precomputation.z + fp2_mul(&out->precomputation.t, &TT2.z, &out->codomain.null_point.t); + fp2_mul(&out->precomputation.t, &out->precomputation.t, &TT2.y); // done for out->precomputation.t + + if (hadamard_bool_2) { + hadamard(&out->codomain.null_point, &out->codomain.null_point); + } +} + +static void +theta_isogeny_eval(theta_point_t *out, const theta_isogeny_t *phi, const theta_point_t *P) +{ + if (phi->hadamard_bool_1) { + hadamard(out, P); + to_squared_theta(out, out); + } else { + to_squared_theta(out, P); + } + fp2_mul(&out->x, &out->x, &phi->precomputation.x); + fp2_mul(&out->y, &out->y, &phi->precomputation.y); + fp2_mul(&out->z, &out->z, &phi->precomputation.z); + fp2_mul(&out->t, &out->t, &phi->precomputation.t); + + if (phi->hadamard_bool_2) { + hadamard(out, out); + } +} + +#if defined(ENABLE_SIGN) +// Sample a random secret index in [0, 5] to select one of the 6 normalisation +// matrices for the normalisation of the output of the (2,2)-chain during +// splitting +static unsigned char +sample_random_index(void) +{ + // To avoid bias in reduction we should only consider integers smaller + // than 2^32 which are a multiple of 6, so we only reduce bytes with a + // value in [0, 4294967292-1]. + // We have 4294967292/2^32 = ~99.9999999% chance that the first try is "good". + unsigned char seed_arr[4]; + uint32_t seed; + + do { + randombytes(seed_arr, 4); + seed = (seed_arr[0] | (seed_arr[1] << 8) | (seed_arr[2] << 16) | (seed_arr[3] << 24)); + } while (seed >= 4294967292U); + + uint32_t secret_index = seed - (((uint64_t)seed * 2863311531U) >> 34) * 6; + assert(secret_index == seed % 6); // ensure the constant time trick above works + return (unsigned char)secret_index; +} +#endif + +static bool +splitting_compute(theta_splitting_t *out, const theta_structure_t *A, int zero_index, bool randomize) + +{ + // init + uint32_t ctl; + uint32_t count = 0; + fp2_t U_cst, t1, t2; + + memset(&out->M, 0, sizeof(basis_change_matrix_t)); + + // enumerate through all indices + for (int i = 0; i < 10; i++) { + fp2_set_zero(&U_cst); + for (int t = 0; t < 4; t++) { + // Iterate through the null point + choose_index_theta_point(&t2, t, &A->null_point); + choose_index_theta_point(&t1, t ^ EVEN_INDEX[i][1], &A->null_point); + + // Compute t1 * t2 + fp2_mul(&t1, &t1, &t2); + // If CHI_EVAL(i,t) is +1 we want ctl to be 0 and + // If CHI_EVAL(i,t) is -1 we want ctl to be 0xFF..FF + ctl = (uint32_t)(CHI_EVAL[EVEN_INDEX[i][0]][t] >> 1); + assert(ctl == 0 || ctl == 0xffffffff); + + fp2_neg(&t2, &t1); + fp2_select(&t1, &t1, &t2, ctl); + + // Then we compute U_cst ± (t1 * t2) + fp2_add(&U_cst, &U_cst, &t1); + } + + // If U_cst is 0 then update the splitting matrix + ctl = fp2_is_zero(&U_cst); + count -= ctl; + select_base_change_matrix(&out->M, &out->M, &SPLITTING_TRANSFORMS[i], ctl); + if (zero_index != -1 && i == zero_index && + !ctl) { // extra checks if we know exactly where the 0 index should be + return 0; + } + } + +#if defined(ENABLE_SIGN) + // Pick a random normalization matrix + if (randomize) { + unsigned char secret_index = sample_random_index(); + basis_change_matrix_t Mrandom; + + set_base_change_matrix_from_precomp(&Mrandom, &NORMALIZATION_TRANSFORMS[0]); + + // Use a constant time selection to pick the index we want + for (unsigned char i = 1; i < 6; i++) { + // When i == secret_index, mask == 0 and 0xFF..FF otherwise + int32_t mask = i - secret_index; + mask = (mask | -mask) >> 31; + select_base_change_matrix(&Mrandom, &Mrandom, &NORMALIZATION_TRANSFORMS[i], ~mask); + } + base_change_matrix_multiplication(&out->M, &Mrandom, &out->M); + } +#else + assert(!randomize); +#endif + + // apply the isomorphism to ensure the null point is compatible with splitting + apply_isomorphism(&out->B.null_point, &out->M, &A->null_point); + + // splitting was successful only if exactly one zero was identified + return count == 1; +} + +static int +theta_product_structure_to_elliptic_product(theta_couple_curve_t *E12, theta_structure_t *A) +{ + fp2_t xx, yy; + + // This should be true from our computations in splitting_compute + // but still check this for sanity + if (!is_product_theta_point(&A->null_point)) + return 0; + + ec_curve_init(&(E12->E1)); + ec_curve_init(&(E12->E2)); + + // A valid elliptic theta null point has no zero coordinate + if (fp2_is_zero(&A->null_point.x) | fp2_is_zero(&A->null_point.y) | fp2_is_zero(&A->null_point.z)) + return 0; + + // xx = x², yy = y² + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.y); + // xx = x^4, yy = y^4 + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A2 = -2(x^4+y^4)/(x^4-y^4) + fp2_add(&E12->E2.A, &xx, &yy); + fp2_sub(&E12->E2.C, &xx, &yy); + fp2_add(&E12->E2.A, &E12->E2.A, &E12->E2.A); + fp2_neg(&E12->E2.A, &E12->E2.A); + + // same with x,z + fp2_sqr(&xx, &A->null_point.x); + fp2_sqr(&yy, &A->null_point.z); + fp2_sqr(&xx, &xx); + fp2_sqr(&yy, &yy); + + // A1 = -2(x^4+z^4)/(x^4-z^4) + fp2_add(&E12->E1.A, &xx, &yy); + fp2_sub(&E12->E1.C, &xx, &yy); + fp2_add(&E12->E1.A, &E12->E1.A, &E12->E1.A); + fp2_neg(&E12->E1.A, &E12->E1.A); + + if (fp2_is_zero(&E12->E1.C) | fp2_is_zero(&E12->E2.C)) + return 0; + + return 1; +} + +static int +theta_point_to_montgomery_point(theta_couple_point_t *P12, const theta_point_t *P, const theta_structure_t *A) +{ + fp2_t temp; + const fp2_t *x, *z; + + if (!is_product_theta_point(P)) + return 0; + + x = &P->x; + z = &P->y; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->z; + z = &P->t; + } + if (fp2_is_zero(x) & fp2_is_zero(z)) { + return 0; // at this point P=(0:0:0:0) so is invalid + } + // P2.X = A.null_point.y * P.x + A.null_point.x * P.y + // P2.Z = - A.null_point.y * P.x + A.null_point.x * P.y + fp2_mul(&P12->P2.x, &A->null_point.y, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P2.z, &temp, &P12->P2.x); + fp2_add(&P12->P2.x, &P12->P2.x, &temp); + + x = &P->x; + z = &P->z; + if (fp2_is_zero(x) & fp2_is_zero(z)) { + x = &P->y; + z = &P->t; + } + // P1.X = A.null_point.z * P.x + A.null_point.x * P.z + // P1.Z = -A.null_point.z * P.x + A.null_point.x * P.z + fp2_mul(&P12->P1.x, &A->null_point.z, x); + fp2_mul(&temp, &A->null_point.x, z); + fp2_sub(&P12->P1.z, &temp, &P12->P1.x); + fp2_add(&P12->P1.x, &P12->P1.x, &temp); + return 1; +} + +static int +_theta_chain_compute_impl(unsigned n, + theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP, + bool verify, + bool randomize) +{ + theta_structure_t theta; + + // lift the basis + theta_couple_jac_point_t xyT1, xyT2; + + ec_basis_t bas1 = { .P = ker->T1.P1, .Q = ker->T2.P1, .PmQ = ker->T1m2.P1 }; + ec_basis_t bas2 = { .P = ker->T1.P2, .Q = ker->T2.P2, .PmQ = ker->T1m2.P2 }; + if (!lift_basis(&xyT1.P1, &xyT2.P1, &bas1, &E12->E1)) + return 0; + if (!lift_basis(&xyT1.P2, &xyT2.P2, &bas2, &E12->E2)) + return 0; + + const unsigned extra = HD_extra_torsion * extra_torsion; + +#ifndef NDEBUG + assert(extra == 0 || extra == 2); // only cases implemented + if (!test_point_order_twof(&bas2.P, &E12->E2, n + extra)) + debug_print("bas2.P does not have correct order"); + + if (!test_jac_order_twof(&xyT2.P2, &E12->E2, n + extra)) + debug_print("xyT2.P2 does not have correct order"); +#endif + + theta_point_t pts[numP ? numP : 1]; + + int space = 1; + for (unsigned i = 1; i < n; i *= 2) + ++space; + + uint16_t todo[space]; + todo[0] = n - 2 + extra; + + int current = 0; + + // kernel points for the gluing isogeny + theta_couple_jac_point_t jacQ1[space], jacQ2[space]; + jacQ1[0] = xyT1; + jacQ2[0] = xyT2; + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + // the gluing isogeny is quite a bit more expensive than the others, + // so we adjust the usual splitting rule here a little bit: towards + // the end of the doubling chain it will be cheaper to recompute the + // doublings after evaluation than to push the intermediate points. + const unsigned num_dbls = todo[current - 1] >= 16 ? todo[current - 1] / 2 : todo[current - 1] - 1; + assert(num_dbls && num_dbls < todo[current - 1]); + double_couple_jac_point_iter(&jacQ1[current], num_dbls, &jacQ1[current - 1], E12); + double_couple_jac_point_iter(&jacQ2[current], num_dbls, &jacQ2[current - 1], E12); + todo[current] = todo[current - 1] - num_dbls; + } + + // kernel points for the remaining isogeny steps + theta_point_t thetaQ1[space], thetaQ2[space]; + + // the gluing step + theta_gluing_t first_step; + { + assert(todo[current] == 1); + + // compute the gluing isogeny + if (!gluing_compute(&first_step, E12, &jacQ1[current], &jacQ2[current], verify)) + return 0; + + // evaluate + for (unsigned j = 0; j < numP; ++j) { + assert(ec_is_zero(&P12[j].P1) || ec_is_zero(&P12[j].P2)); + if (!gluing_eval_point_special_case(&pts[j], &P12[j], &first_step)) + return 0; + } + + // push kernel points through gluing isogeny + for (int j = 0; j < current; ++j) { + gluing_eval_basis(&thetaQ1[j], &thetaQ2[j], &jacQ1[j], &jacQ2[j], &first_step); + --todo[j]; + } + + --current; + } + + // set-up the theta_structure for the first codomain + theta.null_point = first_step.codomain; + theta.precomputation = 0; + theta_precomputation(&theta); + + theta_isogeny_t step; + + // and now we do the remaining steps + for (unsigned i = 1; current >= 0 && todo[current]; ++i) { + assert(current < space); + while (todo[current] != 1) { + assert(todo[current] >= 2); + ++current; + assert(current < space); + const unsigned num_dbls = todo[current - 1] / 2; + assert(num_dbls && num_dbls < todo[current - 1]); + double_iter(&thetaQ1[current], &theta, &thetaQ1[current - 1], num_dbls); + double_iter(&thetaQ2[current], &theta, &thetaQ2[current - 1], num_dbls); + todo[current] = todo[current - 1] - num_dbls; + } + + // computing the next step + int ret; + if (i == n - 2) // penultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 0, verify); + else if (i == n - 1) // ultimate step + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 1, 0, false); + else + ret = theta_isogeny_compute(&step, &theta, &thetaQ1[current], &thetaQ2[current], 0, 1, verify); + if (!ret) + return 0; + + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + + // updating the codomain + theta = step.codomain; + + // pushing the kernel + assert(todo[current] == 1); + for (int j = 0; j < current; ++j) { + theta_isogeny_eval(&thetaQ1[j], &step, &thetaQ1[j]); + theta_isogeny_eval(&thetaQ2[j], &step, &thetaQ2[j]); + assert(todo[j]); + --todo[j]; + } + + --current; + } + + assert(current == -1); + + if (!extra_torsion) { + if (n >= 3) { + // in the last step we've skipped pushing the kernel since current was == 0, let's do it now + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + } + + // penultimate step + theta_isogeny_compute_4(&step, &theta, &thetaQ1[0], &thetaQ2[0], 0, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + theta_isogeny_eval(&thetaQ1[0], &step, &thetaQ1[0]); + theta_isogeny_eval(&thetaQ2[0], &step, &thetaQ2[0]); + + // ultimate step + theta_isogeny_compute_2(&step, &theta, &thetaQ1[0], &thetaQ2[0], 1, 0); + for (unsigned j = 0; j < numP; ++j) + theta_isogeny_eval(&pts[j], &step, &pts[j]); + theta = step.codomain; + } + + // final splitting step + theta_splitting_t last_step; + + bool is_split = splitting_compute(&last_step, &theta, extra_torsion ? 8 : -1, randomize); + + if (!is_split) { + debug_print("kernel did not generate an isogeny between elliptic products"); + return 0; + } + + if (!theta_product_structure_to_elliptic_product(E34, &last_step.B)) + return 0; + + // evaluate + for (size_t j = 0; j < numP; ++j) { + apply_isomorphism(&pts[j], &last_step.M, &pts[j]); + if (!theta_point_to_montgomery_point(&P12[j], &pts[j], &last_step.B)) + return 0; + } + + return 1; +} + +int +theta_chain_compute_and_eval(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, false); +} + +// Like theta_chain_compute_and_eval, adding extra verification checks; +// used in the signature verification +int +theta_chain_compute_and_eval_verify(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, true, false); +} + +int +theta_chain_compute_and_eval_randomized(unsigned n, + /*const*/ theta_couple_curve_t *E12, + const theta_kernel_couple_points_t *ker, + bool extra_torsion, + theta_couple_curve_t *E34, + theta_couple_point_t *P12, + size_t numP) +{ + return _theta_chain_compute_impl(n, E12, ker, extra_torsion, E34, P12, numP, false, true); +} diff --git a/src/pqm4/sqisign_lvl5/ref/theta_isogenies.h b/src/pqm4/sqisign_lvl5/ref/theta_isogenies.h new file mode 100644 index 0000000..d151811 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/theta_isogenies.h @@ -0,0 +1,18 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta isogeny header + */ + +#ifndef THETA_ISOGENY_H +#define THETA_ISOGENY_H + +#include +#include +#include +#include "theta_structure.h" +#include +#include + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/theta_structure.c b/src/pqm4/sqisign_lvl5/ref/theta_structure.c new file mode 100644 index 0000000..ce97ac6 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/theta_structure.c @@ -0,0 +1,78 @@ +#include "theta_structure.h" +#include + +void +theta_precomputation(theta_structure_t *A) +{ + + if (A->precomputation) { + return; + } + + theta_point_t A_dual; + to_squared_theta(&A_dual, &A->null_point); + + fp2_t t1, t2; + fp2_mul(&t1, &A_dual.x, &A_dual.y); + fp2_mul(&t2, &A_dual.z, &A_dual.t); + fp2_mul(&A->XYZ0, &t1, &A_dual.z); + fp2_mul(&A->XYT0, &t1, &A_dual.t); + fp2_mul(&A->YZT0, &t2, &A_dual.y); + fp2_mul(&A->XZT0, &t2, &A_dual.x); + + fp2_mul(&t1, &A->null_point.x, &A->null_point.y); + fp2_mul(&t2, &A->null_point.z, &A->null_point.t); + fp2_mul(&A->xyz0, &t1, &A->null_point.z); + fp2_mul(&A->xyt0, &t1, &A->null_point.t); + fp2_mul(&A->yzt0, &t2, &A->null_point.y); + fp2_mul(&A->xzt0, &t2, &A->null_point.x); + + A->precomputation = true; +} + +void +double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in) +{ + to_squared_theta(out, in); + fp2_sqr(&out->x, &out->x); + fp2_sqr(&out->y, &out->y); + fp2_sqr(&out->z, &out->z); + fp2_sqr(&out->t, &out->t); + + if (!A->precomputation) { + theta_precomputation(A); + } + fp2_mul(&out->x, &out->x, &A->YZT0); + fp2_mul(&out->y, &out->y, &A->XZT0); + fp2_mul(&out->z, &out->z, &A->XYT0); + fp2_mul(&out->t, &out->t, &A->XYZ0); + + hadamard(out, out); + + fp2_mul(&out->x, &out->x, &A->yzt0); + fp2_mul(&out->y, &out->y, &A->xzt0); + fp2_mul(&out->z, &out->z, &A->xyt0); + fp2_mul(&out->t, &out->t, &A->xyz0); +} + +void +double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp) +{ + if (exp == 0) { + *out = *in; + } else { + double_point(out, A, in); + for (int i = 1; i < exp; i++) { + double_point(out, A, out); + } + } +} + +uint32_t +is_product_theta_point(const theta_point_t *P) +{ + fp2_t t1, t2; + fp2_mul(&t1, &P->x, &P->t); + fp2_mul(&t2, &P->y, &P->z); + return fp2_is_equal(&t1, &t2); +} diff --git a/src/pqm4/sqisign_lvl5/ref/theta_structure.h b/src/pqm4/sqisign_lvl5/ref/theta_structure.h new file mode 100644 index 0000000..fc630b7 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/theta_structure.h @@ -0,0 +1,135 @@ +/** @file + * + * @authors Antonin Leroux + * + * @brief the theta structure header + */ + +#ifndef THETA_STRUCTURE_H +#define THETA_STRUCTURE_H + +#include +#include +#include + +/** @internal + * @ingroup hd_module + * @defgroup hd_theta Functions for theta structures + * @{ + */ + +/** + * @brief Perform the hadamard transform on a theta point + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x+y+z+t, x-y+z-t, x+y-z-t, x-y-z+t) + * + */ +static inline void +hadamard(theta_point_t *out, const theta_point_t *in) +{ + fp2_t t1, t2, t3, t4; + + // t1 = x + y + fp2_add(&t1, &in->x, &in->y); + // t2 = x - y + fp2_sub(&t2, &in->x, &in->y); + // t3 = z + t + fp2_add(&t3, &in->z, &in->t); + // t4 = z - t + fp2_sub(&t4, &in->z, &in->t); + + fp2_add(&out->x, &t1, &t3); + fp2_add(&out->y, &t2, &t4); + fp2_sub(&out->z, &t1, &t3); + fp2_sub(&out->t, &t2, &t4); +} + +/** + * @brief Square the coordinates of a theta point + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2, y^2, z^2, t^2) + * + */ +static inline void +pointwise_square(theta_point_t *out, const theta_point_t *in) +{ + fp2_sqr(&out->x, &in->x); + fp2_sqr(&out->y, &in->y); + fp2_sqr(&out->z, &in->z); + fp2_sqr(&out->t, &in->t); +} + +/** + * @brief Square the coordinates and then perform the hadamard transform + * + * @param out Output: the theta_point + * @param in a theta point* + * in = (x,y,z,t) + * out = (x^2+y^2+z^2+t^2, x^2-y^2+z^2-t^2, x^2+y^2-z^2-t^2, x^2-y^2-z^2+t^2) + * + */ +static inline void +to_squared_theta(theta_point_t *out, const theta_point_t *in) +{ + pointwise_square(out, in); + hadamard(out, out); +} + +/** + * @brief Perform the theta structure precomputation + * + * @param A Output: the theta_structure + * + * if A.null_point = (x,y,z,t) + * if (xx,yy,zz,tt) = to_squared_theta(A.null_point) + * Computes y0,z0,t0,Y0,Z0,T0 = x/y,x/z,x/t,XX/YY,XX/ZZ,XX/TT + * + */ +void theta_precomputation(theta_structure_t *A); + +/** + * @brief Compute the double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * in = (x,y,z,t) + * out = [2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_point(theta_point_t *out, theta_structure_t *A, const theta_point_t *in); + +/** + * @brief Compute the iterated double of the theta point in on the theta struc A + * + * @param out Output: the theta_point + * @param A a theta structure + * @param in a theta point in the theta structure A + * @param exp the exponent + * in = (x,y,z,t) + * out = [2^2] (x,y,z,t) + * /!\ assumes that no coordinates is zero and that the precomputation of A has been done + * + */ +void double_iter(theta_point_t *out, theta_structure_t *A, const theta_point_t *in, int exp); + +/* + * @brief Check if a theta point is a product theta point + * + * @param P a theta point + * @return 0xFFFFFFFF if true, zero otherwise + */ +uint32_t is_product_theta_point(const theta_point_t *P); + +// end hd_theta +/** + * @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/tools.h b/src/pqm4/sqisign_lvl5/ref/tools.h new file mode 100644 index 0000000..5a6a505 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/tools.h @@ -0,0 +1,49 @@ + +#ifndef TOOLS_H +#define TOOLS_H + +#include + +// Debug printing: +// https://stackoverflow.com/questions/1644868/define-macro-for-debug-printing-in-c +#ifndef NDEBUG +#define DEBUG_PRINT 1 +#else +#define DEBUG_PRINT 0 +#endif + +#ifndef __FILE_NAME__ +#define __FILE_NAME__ "NA" +#endif + +#ifndef __LINE__ +#define __LINE__ 0 +#endif + +#ifndef __func__ +#define __func__ "NA" +#endif + +#define debug_print(fmt) \ + do { \ + if (DEBUG_PRINT) \ + printf("warning: %s, file %s, line %d, function %s().\n", \ + fmt, \ + __FILE_NAME__, \ + __LINE__, \ + __func__); \ + } while (0) + + +clock_t tic(void); +float tac(void); /* time in ms since last tic */ +float TAC(const char *str); /* same, but prints it with label 'str' */ +float toc(const clock_t t); /* time in ms since t */ +float TOC(const clock_t t, const char *str); /* same, but prints it with label 'str' */ +float TOC_clock(const clock_t t, const char *str); + +clock_t dclock(const clock_t t); // return the clock cycle diff between now and t +float clock_to_time(const clock_t t, + const char *str); // convert the number of clock cycles t to time +float clock_print(const clock_t t, const char *str); +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/tutil.h b/src/pqm4/sqisign_lvl5/ref/tutil.h new file mode 100644 index 0000000..59f1620 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/tutil.h @@ -0,0 +1,36 @@ +#ifndef TUTIL_H +#define TUTIL_H + +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#define BSWAP16(i) __builtin_bswap16((i)) +#define BSWAP32(i) __builtin_bswap32((i)) +#define BSWAP64(i) __builtin_bswap64((i)) +#define UNUSED __attribute__((unused)) +#else +#define BSWAP16(i) ((((i) >> 8) & 0xff) | (((i) & 0xff00) << 8)) +#define BSWAP32(i) \ + ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24)) +#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32) +#define UNUSED +#endif + +#if defined(RADIX_64) +#define digit_t uint64_t +#define sdigit_t int64_t +#define RADIX 64 +#define LOG2RADIX 6 +#define BSWAP_DIGIT(i) BSWAP64(i) +#elif defined(RADIX_32) +#define digit_t uint32_t +#define sdigit_t int32_t +#define RADIX 32 +#define LOG2RADIX 5 +#define BSWAP_DIGIT(i) BSWAP32(i) +#else +#error "Radix must be 32bit or 64 bit" +#endif + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/verification.h b/src/pqm4/sqisign_lvl5/ref/verification.h new file mode 100644 index 0000000..af67469 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/verification.h @@ -0,0 +1,123 @@ +/** @file + * + * @brief The verification protocol + */ + +#ifndef VERIFICATION_H +#define VERIFICATION_H + +#include +#include + +/** @defgroup verification SQIsignHD verification protocol + * @{ + */ + +/** @defgroup verification_t Types for SQIsignHD verification protocol + * @{ + */ + +typedef digit_t scalar_t[NWORDS_ORDER]; +typedef scalar_t scalar_mtx_2x2_t[2][2]; + +/** @brief Type for the signature + * + * @typedef signature_t + * + * @struct signature + * + */ +typedef struct signature +{ + fp2_t E_aux_A; // the Montgomery A-coefficient for the auxiliary curve + uint8_t backtracking; + uint8_t two_resp_length; + scalar_mtx_2x2_t mat_Bchall_can_to_B_chall; // the matrix of the desired basis + scalar_t chall_coeff; + uint8_t hint_aux; + uint8_t hint_chall; +} signature_t; + +/** @brief Type for the public keys + * + * @typedef public_key_t + * + * @struct public_key + * + */ +typedef struct public_key +{ + ec_curve_t curve; // the normalized A-coefficient of the Montgomery curve + uint8_t hint_pk; +} public_key_t; + +/** @} + */ + +/*************************** Functions *****************************/ + +void public_key_init(public_key_t *pk); +void public_key_finalize(public_key_t *pk); + +void hash_to_challenge(scalar_t *scalar, + const public_key_t *pk, + const ec_curve_t *com_curve, + const unsigned char *message, + size_t length); + +/** + * @brief Verification + * + * @param sig signature + * @param pk public key + * @param m message + * @param l size + * @returns 1 if the signature verifies, 0 otherwise + */ +int protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l); + +/*************************** Encoding *****************************/ + +/** @defgroup encoding Encoding and decoding functions + * @{ + */ + +/** + * @brief Encodes a signature as a byte array + * + * @param enc : Byte array to encode the signature in + * @param sig : Signature to encode + */ +void signature_to_bytes(unsigned char *enc, const signature_t *sig); + +/** + * @brief Decodes a signature from a byte array + * + * @param sig : Structure to decode the signature in + * @param enc : Byte array to decode + */ +void signature_from_bytes(signature_t *sig, const unsigned char *enc); + +/** + * @brief Encodes a public key as a byte array + * + * @param enc : Byte array to encode the public key in + * @param pk : Public key to encode + */ +unsigned char *public_key_to_bytes(unsigned char *enc, const public_key_t *pk); + +/** + * @brief Decodes a public key from a byte array + * + * @param pk : Structure to decode the public key in + * @param enc : Byte array to decode + */ +const unsigned char *public_key_from_bytes(public_key_t *pk, const unsigned char *enc); + +/** @} + */ + +/** @} + */ + +#endif diff --git a/src/pqm4/sqisign_lvl5/ref/verify.c b/src/pqm4/sqisign_lvl5/ref/verify.c new file mode 100644 index 0000000..b5f78ad --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/verify.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include + +// Check that the basis change matrix elements are canonical +// representatives modulo 2^(SQIsign_response_length + 2). +static int +check_canonical_basis_change_matrix(const signature_t *sig) +{ + // This works as long as all values in sig->mat_Bchall_can_to_B_chall are + // positive integers. + int ret = 1; + scalar_t aux; + + memset(aux, 0, NWORDS_ORDER * sizeof(digit_t)); + aux[0] = 0x1; + multiple_mp_shiftl(aux, SQIsign_response_length + HD_extra_torsion - (int)sig->backtracking, NWORDS_ORDER); + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + if (mp_compare(aux, sig->mat_Bchall_can_to_B_chall[i][j], NWORDS_ORDER) <= 0) { + ret = 0; + } + } + } + + return ret; +} + +// Compute the 2^n isogeny from the signature with kernel +// P + [chall_coeff]Q and store the codomain in E_chall +static int +compute_challenge_verify(ec_curve_t *E_chall, const signature_t *sig, const ec_curve_t *Epk, const uint8_t hint_pk) +{ + ec_basis_t bas_EA; + ec_isog_even_t phi_chall; + + // Set domain and length of 2^n isogeny + copy_curve(&phi_chall.curve, Epk); + phi_chall.length = TORSION_EVEN_POWER - sig->backtracking; + + // Compute the basis from the supplied hint + if (!ec_curve_to_basis_2f_from_hint(&bas_EA, &phi_chall.curve, TORSION_EVEN_POWER, hint_pk)) // canonical + return 0; + + // recovering the exact challenge + { + if (!ec_ladder3pt(&phi_chall.kernel, sig->chall_coeff, &bas_EA.P, &bas_EA.Q, &bas_EA.PmQ, &phi_chall.curve)) { + return 0; + }; + } + + // Double the kernel until is has the correct order + ec_dbl_iter(&phi_chall.kernel, sig->backtracking, &phi_chall.kernel, &phi_chall.curve); + + // Compute the codomain + copy_curve(E_chall, &phi_chall.curve); + if (ec_eval_even(E_chall, &phi_chall, NULL, 0)) + return 0; + return 1; +} + +// same as matrix_application_even_basis() in id2iso.c, with some modifications: +// - this version works with a matrix of scalars (not ibz_t). +// - reduction modulo 2^f of matrix elements is removed here, because it is +// assumed that the elements are already cannonical representatives modulo +// 2^f; this is ensured by calling check_canonical_basis_change_matrix() at +// the beginning of protocols_verify(). +static int +matrix_scalar_application_even_basis(ec_basis_t *bas, const ec_curve_t *E, scalar_mtx_2x2_t *mat, int f) +{ + scalar_t scalar0, scalar1; + memset(scalar0, 0, NWORDS_ORDER * sizeof(digit_t)); + memset(scalar1, 0, NWORDS_ORDER * sizeof(digit_t)); + + ec_basis_t tmp_bas; + copy_basis(&tmp_bas, bas); + + // For a matrix [[a, c], [b, d]] we compute: + // + // first basis element R = [a]P + [b]Q + if (!ec_biscalar_mul(&bas->P, (*mat)[0][0], (*mat)[1][0], f, &tmp_bas, E)) + return 0; + // second basis element S = [c]P + [d]Q + if (!ec_biscalar_mul(&bas->Q, (*mat)[0][1], (*mat)[1][1], f, &tmp_bas, E)) + return 0; + // Their difference R - S = [a - c]P + [b - d]Q + mp_sub(scalar0, (*mat)[0][0], (*mat)[0][1], NWORDS_ORDER); + mp_mod_2exp(scalar0, f, NWORDS_ORDER); + mp_sub(scalar1, (*mat)[1][0], (*mat)[1][1], NWORDS_ORDER); + mp_mod_2exp(scalar1, f, NWORDS_ORDER); + return ec_biscalar_mul(&bas->PmQ, scalar0, scalar1, f, &tmp_bas, E); +} + +// Compute the bases for the challenge and auxillary curve from +// the canonical bases. Challenge basis is reconstructed from the +// compressed scalars within the challenge. +static int +challenge_and_aux_basis_verify(ec_basis_t *B_chall_can, + ec_basis_t *B_aux_can, + ec_curve_t *E_chall, + ec_curve_t *E_aux, + signature_t *sig, + const int pow_dim2_deg_resp) +{ + + // recovering the canonical basis as TORSION_EVEN_POWER for consistency with signing + if (!ec_curve_to_basis_2f_from_hint(B_chall_can, E_chall, TORSION_EVEN_POWER, sig->hint_chall)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_chall_can, + TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion - sig->two_resp_length, + B_chall_can, + E_chall); + + if (!ec_curve_to_basis_2f_from_hint(B_aux_can, E_aux, TORSION_EVEN_POWER, sig->hint_aux)) + return 0; + + // setting to the right order + ec_dbl_iter_basis(B_aux_can, TORSION_EVEN_POWER - pow_dim2_deg_resp - HD_extra_torsion, B_aux_can, E_aux); + +#ifndef NDEBUG + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp + sig->two_resp_length)) + debug_print("canonical basis has wrong order, expect something to fail"); +#endif + + // applying the change matrix on the basis of E_chall + return matrix_scalar_application_even_basis(B_chall_can, + E_chall, + &sig->mat_Bchall_can_to_B_chall, + pow_dim2_deg_resp + HD_extra_torsion + sig->two_resp_length); +} + +// When two_resp_length is non-zero, we must compute a small 2^n-isogeny +// updating E_chall as the codomain as well as push the basis on E_chall +// through this isogeny +static int +two_response_isogeny_verify(ec_curve_t *E_chall, ec_basis_t *B_chall_can, const signature_t *sig, int pow_dim2_deg_resp) +{ + ec_point_t ker, points[3]; + + // choosing the right point for the small two_isogenies + if (mp_is_even(sig->mat_Bchall_can_to_B_chall[0][0], NWORDS_ORDER) && + mp_is_even(sig->mat_Bchall_can_to_B_chall[1][0], NWORDS_ORDER)) { + copy_point(&ker, &B_chall_can->Q); + } else { + copy_point(&ker, &B_chall_can->P); + } + + copy_point(&points[0], &B_chall_can->P); + copy_point(&points[1], &B_chall_can->Q); + copy_point(&points[2], &B_chall_can->PmQ); + + ec_dbl_iter(&ker, pow_dim2_deg_resp + HD_extra_torsion, &ker, E_chall); + +#ifndef NDEBUG + if (!test_point_order_twof(&ker, E_chall, sig->two_resp_length)) + debug_print("kernel does not have order 2^(two_resp_length"); +#endif + + if (ec_eval_small_chain(E_chall, &ker, sig->two_resp_length, points, 3, false)) { + return 0; + } + +#ifndef NDEBUG + if (!test_point_order_twof(&points[0], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[0] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[1], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[1] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + if (!test_point_order_twof(&points[2], E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("points[2] does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + copy_point(&B_chall_can->P, &points[0]); + copy_point(&B_chall_can->Q, &points[1]); + copy_point(&B_chall_can->PmQ, &points[2]); + return 1; +} + +// The commitment curve can be recovered from the codomain of the 2D +// isogeny built from the bases computed during verification. +static int +compute_commitment_curve_verify(ec_curve_t *E_com, + const ec_basis_t *B_chall_can, + const ec_basis_t *B_aux_can, + const ec_curve_t *E_chall, + const ec_curve_t *E_aux, + int pow_dim2_deg_resp) + +{ +#ifndef NDEBUG + // Check all the points are the correct order + if (!test_basis_order_twof(B_chall_can, E_chall, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_chall_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); + + if (!test_basis_order_twof(B_aux_can, E_aux, HD_extra_torsion + pow_dim2_deg_resp)) + debug_print("B_aux_can does not have order 2^(HD_extra_torsion + pow_dim2_deg_resp"); +#endif + + // now compute the dim2 isogeny from Echall x E_aux -> E_com x E_aux' + // of kernel B_chall_can x B_aux_can + + // first we set-up the kernel + theta_couple_curve_t EchallxEaux; + copy_curve(&EchallxEaux.E1, E_chall); + copy_curve(&EchallxEaux.E2, E_aux); + + theta_kernel_couple_points_t dim_two_ker; + copy_bases_to_kernel(&dim_two_ker, B_chall_can, B_aux_can); + + // computing the isogeny + theta_couple_curve_t codomain; + int codomain_splits; + ec_curve_init(&codomain.E1); + ec_curve_init(&codomain.E2); + // handling the special case where we don't need to perform any dim2 computation + if (pow_dim2_deg_resp == 0) { + codomain_splits = 1; + copy_curve(&codomain.E1, &EchallxEaux.E1); + copy_curve(&codomain.E2, &EchallxEaux.E2); + // We still need to check that E_chall is supersingular + // This assumes that HD_extra_torsion == 2 + if (!ec_is_basis_four_torsion(B_chall_can, E_chall)) { + return 0; + } + } else { + codomain_splits = theta_chain_compute_and_eval_verify( + pow_dim2_deg_resp, &EchallxEaux, &dim_two_ker, true, &codomain, NULL, 0); + } + + // computing the commitment curve + // its always the first one because of our (2^n,2^n)-isogeny formulae + copy_curve(E_com, &codomain.E1); + + return codomain_splits; +} + +// SQIsign verification +int +protocols_verify(signature_t *sig, const public_key_t *pk, const unsigned char *m, size_t l) +{ + int verify; + + if (!check_canonical_basis_change_matrix(sig)) + return 0; + + // Computation of the length of the dim 2 2^n isogeny + int pow_dim2_deg_resp = SQIsign_response_length - (int)sig->two_resp_length - (int)sig->backtracking; + + // basic sanity test: checking that the response is not too long + if (pow_dim2_deg_resp < 0) + return 0; + // The dim 2 isogeny embeds a dim 1 isogeny of odd degree, so it can + // never be of length 2. + if (pow_dim2_deg_resp == 1) + return 0; + + // check the public curve is valid + if (!ec_curve_verify_A(&(pk->curve).A)) + return 0; + + // Set auxiliary curve from the A-coefficient within the signature + ec_curve_t E_aux; + if (!ec_curve_init_from_A(&E_aux, &sig->E_aux_A)) + return 0; // invalid curve + + // checking that we are given A-coefficients and no precomputation + assert(fp2_is_one(&pk->curve.C) == 0xFFFFFFFF && !pk->curve.is_A24_computed_and_normalized); + + // computation of the challenge + ec_curve_t E_chall; + if (!compute_challenge_verify(&E_chall, sig, &pk->curve, pk->hint_pk)) { + return 0; + } + + // Computation of the canonical bases for the challenge and aux curve + ec_basis_t B_chall_can, B_aux_can; + + if (!challenge_and_aux_basis_verify(&B_chall_can, &B_aux_can, &E_chall, &E_aux, sig, pow_dim2_deg_resp)) { + return 0; + } + + // When two_resp_length != 0 we need to compute a second, short 2^r-isogeny + if (sig->two_resp_length > 0) { + if (!two_response_isogeny_verify(&E_chall, &B_chall_can, sig, pow_dim2_deg_resp)) { + return 0; + } + } + + // We can recover the commitment curve with a 2D isogeny + // The supplied signature did not compute an isogeny between eliptic products + // and so definitely is an invalid signature. + ec_curve_t E_com; + if (!compute_commitment_curve_verify(&E_com, &B_chall_can, &B_aux_can, &E_chall, &E_aux, pow_dim2_deg_resp)) + return 0; + + scalar_t chk_chall; + + // recomputing the challenge vector + hash_to_challenge(&chk_chall, pk, &E_com, m, l); + + // performing the final check + verify = mp_compare(sig->chall_coeff, chk_chall, NWORDS_ORDER) == 0; + + return verify; +} diff --git a/src/pqm4/sqisign_lvl5/ref/xeval.c b/src/pqm4/sqisign_lvl5/ref/xeval.c new file mode 100644 index 0000000..7fc7170 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/xeval.c @@ -0,0 +1,64 @@ +#include "isog.h" +#include "ec.h" +#include + +// ----------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------------------- + +// Degree-2 isogeny evaluation with kenerl generated by P != (0, 0) +void +xeval_2(ec_point_t *R, ec_point_t *const Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1, t2; + for (int j = 0; j < lenQ; j++) { + fp2_add(&t0, &Q[j].x, &Q[j].z); + fp2_sub(&t1, &Q[j].x, &Q[j].z); + fp2_mul(&t2, &kps->K.x, &t1); + fp2_mul(&t1, &kps->K.z, &t0); + fp2_add(&t0, &t2, &t1); + fp2_sub(&t1, &t2, &t1); + fp2_mul(&R[j].x, &Q[j].x, &t0); + fp2_mul(&R[j].z, &Q[j].z, &t1); + } +} + +void +xeval_2_singular(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps2_t *kps) +{ + fp2_t t0, t1; + for (int i = 0; i < lenQ; i++) { + fp2_mul(&t0, &Q[i].x, &Q[i].z); + fp2_mul(&t1, &kps->K.x, &Q[i].z); + fp2_add(&t1, &t1, &Q[i].x); + fp2_mul(&t1, &t1, &Q[i].x); + fp2_sqr(&R[i].x, &Q[i].z); + fp2_add(&R[i].x, &R[i].x, &t1); + fp2_mul(&R[i].z, &t0, &kps->K.z); + } +} + +// Degree-4 isogeny evaluation with kenerl generated by P such that [2]P != (0, 0) +void +xeval_4(ec_point_t *R, const ec_point_t *Q, const int lenQ, const ec_kps4_t *kps) +{ + const ec_point_t *K = kps->K; + + fp2_t t0, t1; + + for (int i = 0; i < lenQ; i++) { + fp2_add(&t0, &Q[i].x, &Q[i].z); + fp2_sub(&t1, &Q[i].x, &Q[i].z); + fp2_mul(&(R[i].x), &t0, &K[1].x); + fp2_mul(&(R[i].z), &t1, &K[2].x); + fp2_mul(&t0, &t0, &t1); + fp2_mul(&t0, &t0, &K[0].x); + fp2_add(&t1, &(R[i].x), &(R[i].z)); + fp2_sub(&(R[i].z), &(R[i].x), &(R[i].z)); + fp2_sqr(&t1, &t1); + fp2_sqr(&(R[i].z), &(R[i].z)); + fp2_add(&(R[i].x), &t0, &t1); + fp2_sub(&t0, &t0, &(R[i].z)); + fp2_mul(&(R[i].x), &(R[i].x), &t1); + fp2_mul(&(R[i].z), &(R[i].z), &t0); + } +} diff --git a/src/pqm4/sqisign_lvl5/ref/xisog.c b/src/pqm4/sqisign_lvl5/ref/xisog.c new file mode 100644 index 0000000..7242d29 --- /dev/null +++ b/src/pqm4/sqisign_lvl5/ref/xisog.c @@ -0,0 +1,61 @@ +#include "isog.h" +#include "ec.h" +#include + +// ------------------------------------------------------------------------- +// ------------------------------------------------------------------------- + +// Degree-2 isogeny with kernel generated by P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_2(ec_kps2_t *kps, ec_point_t *B, const ec_point_t P) +{ + fp2_sqr(&B->x, &P.x); + fp2_sqr(&B->z, &P.z); + fp2_sub(&B->x, &B->z, &B->x); + fp2_add(&kps->K.x, &P.x, &P.z); + fp2_sub(&kps->K.z, &P.x, &P.z); +} + +void +xisog_2_singular(ec_kps2_t *kps, ec_point_t *B24, ec_point_t A24) +{ + // No need to check the square root, only used for signing. + fp2_t t0, four; + fp2_set_small(&four, 4); + fp2_add(&t0, &A24.x, &A24.x); + fp2_sub(&t0, &t0, &A24.z); + fp2_add(&t0, &t0, &t0); + fp2_inv(&A24.z); + fp2_mul(&t0, &t0, &A24.z); + fp2_copy(&kps->K.x, &t0); + fp2_add(&B24->x, &t0, &t0); + fp2_sqr(&t0, &t0); + fp2_sub(&t0, &t0, &four); + fp2_sqrt(&t0); + fp2_neg(&kps->K.z, &t0); + fp2_add(&B24->z, &t0, &t0); + fp2_add(&B24->x, &B24->x, &B24->z); + fp2_add(&B24->z, &B24->z, &B24->z); +} + +// Degree-4 isogeny with kernel generated by P such that [2]P != (0 ,0) +// Outputs the curve coefficient in the form A24=(A+2C:4C) +void +xisog_4(ec_kps4_t *kps, ec_point_t *B, const ec_point_t P) +{ + ec_point_t *K = kps->K; + + fp2_sqr(&K[0].x, &P.x); + fp2_sqr(&K[0].z, &P.z); + fp2_add(&K[1].x, &K[0].z, &K[0].x); + fp2_sub(&K[1].z, &K[0].z, &K[0].x); + fp2_mul(&B->x, &K[1].x, &K[1].z); + fp2_sqr(&B->z, &K[0].z); + + // Constants for xeval_4 + fp2_add(&K[2].x, &P.x, &P.z); + fp2_sub(&K[1].x, &P.x, &P.z); + fp2_add(&K[0].x, &K[0].z, &K[0].z); + fp2_add(&K[0].x, &K[0].x, &K[0].x); +}