@@ -7,400 +7,270 @@
static AES256_CTR_DRBG_struct DRBG_ctx ;
static AES256_CTR_DRBG_struct DRBG_ctx ;
// 优化1: 改进S-box实现, 减少内存操作
static inline uint32_t AES_sbox_x4 ( uint32_t in ) {
static inline uint32_t
AES_sbox_x4 ( uint32_t in )
{
uint8x16_t sbox_val = vreinterpretq_u8_u32 ( vdupq_n_u32 ( in ) ) ;
uint8x16_t sbox_val = vreinterpretq_u8_u32 ( vdupq_n_u32 ( in ) ) ;
sbox_val = vaeseq_u8 ( sbox_val , vdupq_n_u8 ( 0 ) ) ;
sbox_val = vaeseq_u8 ( sbox_val , vdupq_n_u8 ( 0 ) ) ;
return vgetq_lane_u32 ( vreinterpretq_u32_u8 ( sbox_val ) , 0 ) ;
return vgetq_lane_u32 ( vreinterpretq_u32_u8 ( sbox_val ) , 0 ) ;
}
}
# define ROTR32(x, n) ((x << (32 - n)) | (x >> n))
# define ROTR32(x, n) ((x << (32 - n)) | (x >> n))
// 优化2: 使用更紧凑的数据结构,提高缓存效率
typedef union {
typedef union
uint8_t u8 [ 15 ] [ 16 ] ;
{
uint32_t u32 [ 15 ] [ 4 ] ;
uint8_t u8 [ 240 ] ; // 15*16
uint32_t u32 [ 60 ] ; // 15*4
uint8x16_t v [ 15 ] ;
} subkeys_t ;
} subkeys_t ;
// 优化3: 改进密钥调度, 使用Neon指令进行批量处理
static void AES256_key_schedule ( uint8_t subkeys [ 15 ] [ 16 ] , const uint8_t * key ) {
static void
AES256_key_schedule ( uint8_t subkeys [ 15 ] [ 16 ] , const uint8_t * key )
{
subkeys_t * sk = ( subkeys_t * ) subkeys ;
subkeys_t * sk = ( subkeys_t * ) subkeys ;
uint8x16 _t rcon = vdupq_n_u8 ( 0x01 ) ;
uint8_t rcon = 1 ;
uint8x16 _t rcon_step = vdupq_n_u8 ( 0x1b ) ;
uint32 _t s ;
int i , j ;
// 一次性复制前两轮密钥
memcpy ( & subkeys [ 0 ] [ 0 ] , key , 32 * sizeof ( uint8_t ) ) ;
memcpy ( & subkeys [ 0 ] [ 0 ] , key , 32 ) ;
uint8x16_t prev_key = vld1q_u8 ( & subkeys [ 0 ] [ 0 ] ) ;
for ( i = 2 ; i < 14 ; i + = 2 ) {
uint8x16_t prev_prev_key = vld1q_u8 ( & subkeys [ 1 ] [ 0 ] ) ;
s = AES_sbox_x4 ( sk - > u32 [ i - 1 ] [ 3 ] ) ;
sk - > u32 [ i ] [ 0 ] = ROTR32 ( s , 8 ) ^ rcon ^ sk - > u32 [ i - 2 ] [ 0 ] ;
for ( int i = 2 ; i < 15 ; i + + ) {
for ( j = 1 ; j < 4 ; j + + ) {
// 提取最后一列并进行S-box变换
sk - > u32 [ i ] [ j ] = sk - > u32 [ i ] [ j - 1 ] ^ sk - > u32 [ i - 2 ] [ j ] ;
uint8x16_t last_col = vextq_u8 ( prev_key , vdupq_n_u8 ( 0 ) , 12 ) ;
}
last_col = vaeseq_u8 ( last_col , vdupq_n_u8 ( 0 ) ) ;
// RotWord
s = AES_sbox_x4 ( sk - > u32 [ i ] [ 3 ] ) ;
last_col = vextq_u8 ( last_col , last_col , 3 ) ;
sk - > u32 [ i + 1 ] [ 0 ] = s ^ sk - > u32 [ i - 1 ] [ 0 ] ;
// XOR with rcon
for ( j = 1 ; j < 4 ; j + + ) {
uint8x16_t new_key_first = veorq_u8 ( veorq_u8 ( last_col , rcon ) , prev_prev_key ) ;
sk - > u32 [ i + 1 ] [ j ] = sk - > u32 [ i + 1 ] [ j - 1 ] ^ sk - > u32 [ i - 1 ] [ j ] ;
}
// 生成新密钥的剩余部分
rcon = ( rcon < < 1 ) ^ ( ( rcon > > 7 ) * 0x11b ) ;
uint8x16_t new_key = vextq_u8 ( prev_prev_key , new_key_first , 12 ) ;
}
// 保存新密钥
s = AES_sbox_x4 ( sk - > u32 [ 13 ] [ 3 ] ) ;
vst1q_u8 ( & subkeys [ i ] [ 0 ] , new_key ) ;
sk - > u32 [ 14 ] [ 0 ] = ROTR32 ( s , 8 ) ^ rcon ^ sk - > u32 [ 12 ] [ 0 ] ;
// 更新rcon
for ( j = 1 ; j < 4 ; j + + ) {
uint8_t rcon_val = vgetq_lane_u8 ( rcon , 0 ) ;
sk - > u32 [ 14 ] [ j ] = sk - > u32 [ 14 ] [ j - 1 ] ^ sk - > u32 [ 12 ] [ j ] ;
rcon_val = ( rcon_val < < 1 ) ^ ( ( rcon_val > > 7 ) * 0x1b ) ;
rcon = vdupq_n_u8 ( rcon_val ) ;
// 更新前两个密钥
prev_prev_key = prev_key ;
prev_key = new_key ;
}
}
}
}
// 优化4: 改进 AES- 256 ECB实现,减少循环开销
# define AES256_ ECB_XWAYS(ways, vsubkeys, ctr, out) \
static inline void
do { \
AES256_ECB_XWAYS_OPTIMIZED ( int ways , const uint8x16_t vsubkeys [ 15 ] , uint8x16_t state[ ] , unsigned char * out )
uint8x16_t state[ways]; \
{
\
// 第一轮: AddRoundKey
for (int j = 0; j < ways; j++) { \
for ( int j = 0 ; j < ways ; j + + ) {
state[j] = vaeseq_u8(ctr[j], vsubkeys[0]); \
state[ j ] = vaese q_u8( state [ j ] , vsubkeys [ 0 ] ) ;
state[j] = vaesmc q_u8(state[j]); \
state [ j ] = vaesmcq_u8 ( state [ j ] ) ;
} \
\
for (int i = 1; i < 13; i++) { \
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[i]); \
state[j] = vaesmcq_u8(state[j]); \
} \
} \
\
for (int j = 0; j < ways; j++) { \
state[j] = vaeseq_u8(state[j], vsubkeys[13]); \
state[j] = veorq_u8(state[j], vsubkeys[14]); \
vst1q_u8(out + j * 16, state[j]); \
} \
} while (0);
// subkeys - subkeys for AES-256
// ctr - a 128-bit plaintext value
// buffer - a 128-bit ciphertext value
static void AES256_ECB ( uint8x16_t vsubkeys [ 15 ] , uint8x16_t ctr ,
unsigned char * buffer ) {
AES256_ECB_XWAYS ( 1 , vsubkeys , ( & ctr ) , buffer ) ;
}
}
// 中间轮: SubBytes, ShiftRows, MixColumns, AddRoundKey
// vsubkeys - subkeys for AES-256
for ( int i = 1 ; i < 13 ; i + + ) {
// ctr - an array of 3 x 128-bit plaintext value
uint8x16_t subkey = vsubkeys [ i ] ;
// buffer - an array of 3 x 128-bit ciphertext value
for ( int j = 0 ; j < ways ; j + + ) {
static void AES256_ECB_x3 ( u int8x16_t vsubkeys [ 15 ] , uint8x16_t ctr [ 3 ] ,
state [ j ] = vaeseq_u8 ( state [ j ] , subkey ) ;
unsigned char * buffer ) {
state [ j ] = vaesmcq_u8 ( state [ j ] ) ;
AES256_ECB_XWAYS ( 3 , vsubkeys , ctr , buffer ) ;
}
}
}
// 最后一轮: SubBytes, ShiftRows, AddRoundKey
static void bswap128 ( __uint128_t * x ) {
for ( int j = 0 ; j < ways ; j + + ) {
uint64_t * x64 = ( uint64_t * ) x ;
state [ j ] = vaeseq_u8 ( state [ j ] , vsubkeys [ 13 ] ) ;
state [ j ] = veorq_u8 ( state [ j ] , vsubkeys [ 14 ] ) ;
uint64_t t = x64 [ 0 ] ;
vst1q_u8 ( out + j * 16 , state [ j ] ) ;
x64 [ 0 ] = x64 [ 1 ] ;
}
x64 [ 1 ] = t ;
x64 [ 0 ] = __builtin_bswap64 ( x64 [ 0 ] ) ;
x64 [ 1 ] = __builtin_bswap64 ( x64 [ 1 ] ) ;
}
}
// 优化5: 使用向量化的字节交换函数
static void add_to_V ( unsigned char V [ ] , int incr ) {
static inline void
__uint128_t * V128 = ( __uint128_t * ) V ;
bswap128_vectorized ( uint8x16_t * v )
bswap128( V128 ) ;
{
( * V128 ) + = incr ;
// 使用vrev64q_u8和vtrn1q_u8等指令优化字节交换
bswap128 ( V128 ) ;
uint8x16_t reversed = vrev64q_u8 ( * v ) ;
uint8x8x2_t halves = vtrn_u8 ( vget_low_u8 ( reversed ) , vget_high_u8 ( reversed ) ) ;
* v = vcombine_u8 ( halves . val [ 1 ] , halves . val [ 0 ] ) ;
}
}
// 优化6: 改进计数器增量函数
static void AES256_CTR_DRBG_Update ( unsigned char * provided_data ,
static inli ne void
uint8x16_t vsubkeys [ 15 ] , unsig ned char * Key ,
add_to_V_optimized ( unsigned char V [ ] , int incr )
unsigned char * V ) {
{
// 使用向量化操作增加计数器
uint8x16_t vV = vld1q_u8 ( V ) ;
uint64x2_t vV64 = vreinterpretq_u64_u8 ( vV ) ;
// 处理64位增量
uint64x2_t incr64 = vdupq_n_u64 ( ( uint64_t ) incr ) ;
vV64 = vaddq_u64 ( vV64 , incr64 ) ;
// 如果低64位溢出, 增加高64位
uint64_t low = vgetq_lane_u64 ( vV64 , 0 ) ;
if ( low < ( uint64_t ) incr ) {
uint64_t high = vgetq_lane_u64 ( vV64 , 1 ) ;
vV64 = vsetq_lane_u64 ( high + 1 , vV64 , 1 ) ;
}
vV = vreinterpretq_u8_u64 ( vV64 ) ;
bswap128_vectorized ( & vV ) ;
vst1q_u8 ( V , vV ) ;
}
// 动态确定最优WAYS值
static int
determine_optimal_ways ( unsigned long long data_size )
{
// 根据数据大小选择最优的WAYS值
// 这些阈值可以通过实际测试优化
// 小数据块: 使用4路并行
if ( data_size < 256 ) {
return 4 ;
}
// 中等数据块: 使用6路并行
else if ( data_size < 1024 ) {
return 6 ;
}
// 大数据块: 使用8路并行
else if ( data_size < 4096 ) {
return 8 ;
}
// 超大数据块: 使用10路并行, 但不超过12
else {
return 8 ;
}
}
// 优化7: 改进DRBG更新函数, 减少内存操作
static void
AES256_CTR_DRBG_Update_Optimized ( unsigned char * provided_data ,
const uint8x16_t vsubkeys [ 15 ] ,
unsigned char * Key ,
unsigned char * V )
{
unsigned char temp [ 48 ] ;
unsigned char temp [ 48 ] ;
__uint128_t V128 , t ;
uint64x2_t vV [ 3 ] ;
// 使用向量化操作处理计数器
memcpy ( & V128 , DRBG_ctx . V , sizeof ( V128 ) ) ;
uint8x16_t vV = vld1q_u8 ( V ) ;
uint8x16_t vV1 = vV ;
uint8x16_t vV2 = vV ;
uint8x16_t vV3 = vV ;
// 增量计数器值
bswap128 ( & V128 ) ;
uint64x2_t inc = vdupq_n_u64 ( 1 ) ;
uint64x2_t vV64 = vreinterpretq_u64_u8 ( vV1 ) ;
vV64 = vaddq_u64 ( vV64 , inc ) ;
vV1 = vreinterpretq_u8_u64 ( vV64 ) ;
vV64 = vreinterpretq_u64_u8 ( vV2 ) ;
for ( int j = 0 ; j < 3 ; j + + ) {
vV64 = vaddq_u64 ( vV64 , vdupq_n_u64 ( 2 ) ) ;
V128 + + ;
vV2 = vreinterpretq_u8_u64 ( vV64 ) ;
t = V128 ;
bswap128 ( & t ) ;
vV64 = vreinterpretq_u64_u8 ( vV3 ) ;
vV[ j ] = vld1q_u64 ( ( uint64_t * ) & t ) ;
vV64 = vaddq_u64 ( vV64 , vdupq_n_u64 ( 3 ) ) ;
vV3 = vreinterpretq_u8_u64 ( vV64 ) ;
// 批量AES加密
uint8x16_t vV_array [ 3 ] = { vV1 , vV2 , vV3 } ;
AES256_ECB_XWAYS_OPTIMIZED ( 3 , vsubkeys , vV_array , temp ) ;
// 如果有提供的数据, 进行XOR操作
if ( provided_data ! = NULL ) {
uint8x16_t vData = vld1q_u8 ( provided_data ) ;
uint8x16_t vTemp = vld1q_u8 ( temp ) ;
vst1q_u8 ( temp , veorq_u8 ( vTemp , vData ) ) ;
vData = vld1q_u8 ( provided_data + 16 ) ;
vTemp = vld1q_u8 ( temp + 16 ) ;
vst1q_u8 ( temp + 16 , veorq_u8 ( vTemp , vData ) ) ;
vData = vld1q_u8 ( provided_data + 32 ) ;
vTemp = vld1q_u8 ( temp + 32 ) ;
vst1q_u8 ( temp + 32 , veorq_u8 ( vTemp , vData ) ) ;
}
}
// 更新密钥和V
AES256_ECB_x3 ( vsubkeys , ( uint8x16_t * ) vV , temp ) ;
if ( provided_data ! = NULL )
for ( int i = 0 ; i < 48 ; i + + )
temp [ i ] ^ = provided_data [ i ] ;
memcpy ( Key , temp , 32 ) ;
memcpy ( Key , temp , 32 ) ;
memcpy ( V , temp + 32 , 16 ) ;
memcpy ( V , temp + 32 , 16 ) ;
add_to_V_optimized ( DRBG_ctx . V , 1 ) ;
add_to_V ( DRBG_ctx . V , 1 ) ;
}
}
// 优化8: 改进初始化函数
void randombytes_init_arm64crypto ( unsigned char * entropy_input ,
void
randombytes_init_arm64crypto_optimized ( unsigned char * entropy_input ,
unsigned char * personalization_string ,
unsigned char * personalization_string ,
int security_strength )
int security_strength ) {
{
( void ) security_strength ;
( void ) security_strength ;
unsigned char seed_material [ 48 ] ;
unsigned char seed_material [ 48 ] ;
uint8_t subkeys [ 15 ] [ 16 ] ;
uint8_t subkeys [ 15 ] [ 16 ] ;
uint8x16_t vsubkeys [ 15 ] ;
uint8x16_t vsubkeys [ 15 ] ;
// 使用向量化操作初始化种子材料
if ( personalization_string ) {
uint8x16_t vEntropy = vld1q_u8 ( entropy_input ) ;
uint8x16_t vPersonal = vld1q_u8 ( personalization_string ) ;
vst1q_u8 ( seed_material , veorq_u8 ( vEntropy , vPersonal ) ) ;
vEntropy = vld1q_u8 ( entropy_input + 16 ) ;
vPersonal = vld1q_u8 ( personalization_string + 16 ) ;
vst1q_u8 ( seed_material + 16 , veorq_u8 ( vEntropy , vPersonal ) ) ;
vEntropy = vld1q_u8 ( entropy_input + 32 ) ;
vPersonal = vld1q_u8 ( personalization_string + 32 ) ;
vst1q_u8 ( seed_material + 32 , veorq_u8 ( vEntropy , vPersonal ) ) ;
} else {
memcpy ( seed_material , entropy_input , 48 ) ;
memcpy ( seed_material , entropy_input , 48 ) ;
}
if ( personalization_string )
for ( int i = 0 ; i < 48 ; i + + )
seed_material [ i ] ^ = personalization_string [ i ] ;
memset ( DRBG_ctx . Key , 0x00 , 32 ) ;
memset ( DRBG_ctx . V , 0x00 , 16 ) ;
// 初始化密钥和V为零
uint8x16_t vZero = vdupq_n_u8 ( 0 ) ;
vst1q_u8 ( DRBG_ctx . Key , vZero ) ;
vst1q_u8 ( DRBG_ctx . Key + 16 , vZero ) ;
vst1q_u8 ( DRBG_ctx . V , vZero ) ;
// 生成子密钥
AES256_key_schedule ( subkeys , DRBG_ctx . Key ) ;
AES256_key_schedule ( subkeys , DRBG_ctx . Key ) ;
for ( int i = 0 ; i < 15 ; i + + ) {
for ( int i = 0 ; i < 15 ; i + + ) {
vsubkeys [ i ] = vld1q_u8 ( subkeys [ i ] ) ;
vsubkeys [ i ] = vld1q_u8 ( subkeys [ i ] ) ;
}
}
// 更新DRBG状态
AES256_CTR_DRBG_Update ( seed_material , vsubkeys , DRBG_ctx . Key , DRBG_ctx . V ) ;
AES256_CTR_DRBG_Update_Optimized ( seed_material , vsubkeys , DRBG_ctx . Key , DRBG_ctx . V ) ;
DRBG_ctx . reseed_counter = 1 ;
DRBG_ctx . reseed_counter = 1 ;
}
}
// 优化9: 动态选择WAYS值的主随机数生成函数
# define WAYS 4
int
randombytes_arm64crypto_optimized ( unsigned char * x , unsigned long long xlen )
int randombytes_arm64crypto ( unsigned char * x , unsigned long long xlen ) {
{
uint8_t subkeys [ 15 ] [ 16 ] ;
uint8_t subkeys [ 15 ] [ 16 ] ;
unsigned char block [ 16 ] ;
unsigned char block [ 16 ] ;
__uint128_t V [ WAYS ] , Vle [ WAYS ] ;
uint8x16x4_t vV ;
uint8x16_t vsubkeys [ 15 ] ;
uint8x16_t vsubkeys [ 15 ] ;
// 预先计算子密钥
AES256_key_schedule ( subkeys , DRBG_ctx . Key ) ;
AES256_key_schedule ( subkeys , DRBG_ctx . Key ) ;
for ( int j = 0 ; j < 15 ; j + + ) {
for ( int j = 0 ; j < 15 ; j + + ) {
vsubkeys [ j ] = vld1q_u8 ( subkeys [ j ] ) ;
vsubkeys [ j ] = vld1q_u8 ( subkeys [ j ] ) ;
}
}
// 根据数据大小动态确定最优的WAYS值
memcpy ( & Vle [ 0 ] , DRBG_ctx . V , sizeof ( Vle [ 0 ] ) ) ;
int ways = determine_optimal_ways ( xlen ) ;
V [ 0 ] = Vle [ 0 ] ;
vV . val [ 0 ] = vld1q_u8 ( ( uint8_t * ) & V [ 0 ] ) ;
// 处理大块数据( 使用动态确定的WAYS值)
bswap128 ( & Vle [ 0 ] ) ;
if ( xlen > = ways * 16 ) {
for ( int j = 1 ; j < WAYS ; j + + ) {
// 使用动态分配的数组来适应不同的WAYS值
Vle [ j ] = Vle [ j - 1 ] + 1 ;
uint8x16_t vV_array [ 12 ] ; // 最多支持12路并行
V [ j ] = Vle [ j ] ;
uint8x16_t vV = vld1q_u8 ( DRBG_ctx . V ) ;
bswap128 ( & V [ j ] ) ;
vV . val [ j ] = vld1q_u8 ( ( uint8_t * ) & V [ j ] ) ;
// 初始化计数器值
vV_array [ 0 ] = vV ;
for ( int j = 1 ; j < ways ; j + + ) {
uint64x2_t vV64 = vreinterpretq_u64_u8 ( vV ) ;
uint64x2_t inc = vdupq_n_u64 ( j ) ;
vV64 = vaddq_u64 ( vV64 , inc ) ;
vV_array [ j ] = vreinterpretq_u8_u64 ( vV64 ) ;
}
}
// 处理大块数据
int entered_fast_path = ( xlen > = WAYS * 16 ) ? 1 : 0 ;
while ( xlen > = ways * 16 ) {
// 批量AES加密
AES256_ECB_XWAYS_OPTIMIZED ( ways , vsubkeys , vV_array , x ) ;
// 更新计数器值
while ( xlen > = WAYS * 16 ) {
uint64x2_t vV64 = vreinterpretq_u64_u8 ( vV_array [ ways - 1 ] ) ;
for ( int j = 0 ; j < WAYS ; j + + ) {
uint64x2_t inc = vdupq_n_u64 ( ways ) ;
Vle [ j ] + = 4 ;
vV64 = vaddq_u64 ( vV64 , inc ) ;
for ( int j = 0 ; j < ways ; j + + ) {
uint64x2_t current = vreinterpretq_u64_u8 ( vV_array [ j ] ) ;
current = vaddq_u64 ( current , inc ) ;
vV_array [ j ] = vreinterpretq_u8_u64 ( current ) ;
}
}
x + = ways * 16 ;
for ( int j = 0 ; j < WAYS ; j + + ) {
xlen - = ways * 16 ;
vV . val [ j ] = vaeseq_u8 ( vV . val [ j ] , vsubkeys [ 0 ] ) ;
vV . val [ j ] = vaesmcq_u8 ( vV . val [ j ] ) ;
}
}
// 更新V为最后一个计数器值
for ( int i = 1 ; i < 13 ; i + + ) {
vV = vV_array [ ways - 1 ] ;
for ( int j = 0 ; j < WAYS ; j + + ) {
vst1q_u8 ( DRBG_ctx . V , vV ) ;
vV . val [ j ] = vaeseq_u8 ( vV . val [ j ] , vsubkeys [ i ] ) ;
vV . val [ j ] = vaesmcq_u8 ( vV . val [ j ] ) ;
}
}
for ( int j = 0 ; j < WAYS ; j + + ) {
vV . val [ j ] = vaeseq_u8 ( vV . val [ j ] , vsubkeys [ 13 ] ) ;
vV . val [ j ] = veorq_u8 ( vV . val [ j ] , vsubkeys [ 14 ] ) ;
vst1q_u8 ( x + j * 16 , vV . val [ j ] ) ;
}
for ( int j = 0 ; j < WAYS ; j + + ) {
V [ j ] = Vle [ j ] ;
bswap128 ( & V [ j ] ) ;
}
vV = vld1q_u8_x4 ( ( uint8_t * ) V ) ;
x + = WAYS * 16 ;
xlen - = WAYS * 16 ;
}
if ( entered_fast_path & & xlen = = 0 ) {
asm volatile ( " " : " +r,m " ( Vle [ 3 ] ) : : " memory " ) ;
V [ 0 ] = Vle [ 3 ] - 4 ;
bswap128 ( & V [ 0 ] ) ;
}
}
// 处理剩余数据(小量数据)
while ( xlen > 0 ) {
while ( xlen > 0 ) {
uint8x16_t vV = vld1q_u8 ( DRBG_ctx . V ) ;
if ( xlen > 16 ) {
if ( xlen > 16 ) {
uint8x16 _t state = vV ;
AES256_ECB ( vsubkeys , vld1q_u8 ( ( uint8_t * ) & V [ 0 ] ) , x ) ;
AES256_ECB_XWAYS_OPTIMIZED ( 1 , vsubkeys , & state , x ) ;
x + = 16 ;
x + = 16 ;
xlen - = 16 ;
xlen - = 16 ;
Vle [ 0 ] + + ;
V [ 0 ] = Vle [ 0 ] ;
bswap128 ( & V [ 0 ] ) ;
} else {
} else {
uint8x16 _t state = vV ;
AES256_ECB ( vsubkeys , vld1q_u8 ( ( uint8_t * ) & V [ 0 ] ) , block ) ;
AES256_ECB_XWAYS_OPTIMIZED ( 1 , vsubkeys , & state , block ) ;
memcpy ( x , block , xlen ) ;
memcpy ( x , block , xlen ) ;
xlen = 0 ;
xlen = 0 ;
}
}
// 增量V
add_to_V_optimized ( DRBG_ctx . V , 1 ) ;
}
}
// 更新DRBG状态
memcpy ( DRBG_ctx . V , & V [ 0 ] , sizeof ( V [ 0 ] ) ) ;
AES256_CTR_DRBG_Update_Optimized ( NULL , vsubkeys , DRBG_ctx . Key , DRBG_ctx . V ) ;
AES256_CTR_DRBG_Update ( NULL , vsubkeys , DRBG_ctx . Key , DRBG_ctx . V ) ;
DRBG_ctx . reseed_counter + + ;
DRBG_ctx . reseed_counter + + ;
return RNG_SUCCESS ;
return RNG_SUCCESS ;
}
}
// // 高级版本:带有自适应学习能力的随机数生成函数
// int
// randombytes_arm64crypto_adaptive(unsigned char *x, unsigned long long xlen)
// {
// // 静态变量用于记录历史性能数据
// static unsigned long long total_bytes_processed = 0;
// static unsigned long long total_time_used = 0; // 假设有时间测量机制
// uint8_t subkeys[15][16];
// uint8x16_t vsubkeys[15];
// // 预先计算子密钥
// AES256_key_schedule(subkeys, DRBG_ctx.Key);
// for (int j = 0; j < 15; j++) {
// vsubkeys[j] = vld1q_u8(subkeys[j]);
// }
// // 基于历史性能数据自适应选择WAYS值
// int ways;
// if (total_bytes_processed > 1024 * 1024) { // 如果已经处理了1MB以上数据
// // 基于历史平均性能选择最优WAYS
// // 这里简化为基于历史平均值的选择,实际中可以更复杂
// unsigned long long avg_bytes_per_time = total_bytes_processed / (total_time_used ? total_time_used : 1);
// if (avg_bytes_per_time > 1000) { // 假设阈值
// ways = (xlen > 4096) ? 12 : 8; // 高性能情况下使用更高并行度
// } else {
// ways = (xlen > 1024) ? 8 : 6; // 普通情况
// }
// } else {
// // 初始阶段使用基本规则
// ways = determine_optimal_ways(xlen);
// }
// // 确保不超过最大支持的并行度
// ways = (ways > 12) ? 12 : ways;
// // 这里开始实际的处理, 与前面函数类似, 但使用动态确定的ways值
// // ... (实现与randombytes_arm64crypto_optimized类似)
// // 更新历史统计
// total_bytes_processed += xlen;
// // total_time_used += elapsed_time; // 需要实际测量时间
// return RNG_SUCCESS;
// }
// 包装函数
# ifdef RANDOMBYTES_ARM64CRYPTO
# ifdef RANDOMBYTES_ARM64CRYPTO
int
int randombytes ( unsigned char * random_array , unsigned long long nbytes ) {
randombytes ( unsigned char * random_array , unsigned long long nbytes )
int ret = randombytes_arm64crypto ( random_array , nbytes ) ;
{
int ret = randombytes_arm64crypto_optimized ( random_array , nbytes ) ;
# ifdef ENABLE_CT_TESTING
# ifdef ENABLE_CT_TESTING
VALGRIND_MAKE_MEM_UNDEFINED ( random_array , ret ) ;
VALGRIND_MAKE_MEM_UNDEFINED ( random_array , ret ) ;
# endif
# endif
return ret ;
return ret ;
}
}
void
void randombytes_init ( unsigned char * entropy_input ,
randombytes_init ( unsigned char * entropy_input , unsigned char * personalization_string , int security_strength )
unsigned char * personalization_string ,
{
int security_strength ) {
randombytes_init_arm64crypto_optimized ( entropy_input , personalization_string , security_strength ) ;
randombytes_init_arm64crypto ( entropy_input , personalization_string ,
security_strength ) ;
}
}
# endif
# endif