1
0
Fork 0
mirror of git://git.gnupg.org/gnupg.git synced 2025-07-02 22:46:30 +02:00

Improved AES performance.

This commit is contained in:
Werner Koch 2008-03-22 17:01:37 +00:00
parent a614eabba9
commit 537cada38e
10 changed files with 392 additions and 242 deletions

View file

@ -1,3 +1,13 @@
2008-03-22 Werner Koch <wk@g10code.com>
* cipher.c (struct cipher_handle_s): Make sure IV is u32
aligned. Change all users of IV.
(do_cfb_encrypt): Optimize and use bulk code for AES.
(do_cfb_decrypt): Ditto.
* rijndael.c (do_encrypt): Remove.
(do_encrypt_aligned, do_encrypt): New. Taken from libgcrypt.
(rijndael_cfb_enc, rijndael_cfb_dec): New.
2007-12-12 Werner Koch <wk@g10code.com>
* pubkey.c (pubkey_encrypt, pubkey_decrypt): Allow type 20 keys.

View file

@ -118,8 +118,6 @@ twofish_get_info( int algo, size_t *keylen,
void (**decryptf)( void *c, byte *outbuf, const byte *inbuf )
);
/* this is just a kludge for the time we have not yet changed the cipher
* stuff to the scheme we use for random and digests */
const char *
rijndael_get_info( int algo, size_t *keylen,
size_t *blocksize, size_t *contextsize,
@ -127,6 +125,12 @@ rijndael_get_info( int algo, size_t *keylen,
void (**encryptf)(void *c, byte *outbuf, const byte *inbuf),
void (**decryptf)(void *c, byte *outbuf, const byte *inbuf)
);
void rijndael_cfb_enc (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
void rijndael_cfb_dec (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
const char *
idea_get_info( int algo, size_t *keylen,

View file

@ -1,6 +1,6 @@
/* cipher.c - cipher dispatcher
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
* 2007 Free Software Foundation, Inc.
* 2007, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -52,17 +52,26 @@ static struct cipher_table_s cipher_table[TABLE_SIZE];
static int disabled_algos[TABLE_SIZE];
struct cipher_handle_s {
int algo;
int mode;
size_t blocksize;
byte iv[MAX_BLOCKSIZE]; /* (this should be ulong aligned) */
byte lastiv[MAX_BLOCKSIZE];
int unused; /* in IV */
int (*setkey)( void *c, const byte *key, unsigned keylen );
void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
PROPERLY_ALIGNED_TYPE context;
struct cipher_handle_s
{
int algo;
int mode;
size_t blocksize;
/* The initialization vector. To help code optimization we make
sure that it is aligned on an unsigned long and u32 boundary. */
union {
unsigned long dummy_ul_iv;
u32 dummy_u32_iv;
unsigned char iv[MAX_BLOCKSIZE];
} u_iv;
byte lastiv[MAX_BLOCKSIZE];
int unused; /* in IV */
int (*setkey)( void *c, const byte *key, unsigned keylen );
void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
PROPERLY_ALIGNED_TYPE context;
};
@ -459,14 +468,14 @@ cipher_setkey( CIPHER_HANDLE c, byte *key, unsigned keylen )
void
cipher_setiv( CIPHER_HANDLE c, const byte *iv, unsigned ivlen )
{
memset( c->iv, 0, c->blocksize );
memset( c->u_iv.iv, 0, c->blocksize );
if( iv ) {
if( ivlen != c->blocksize )
log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
ivlen, (unsigned)c->blocksize );
if( ivlen > c->blocksize )
ivlen = c->blocksize;
memcpy( c->iv, iv, ivlen );
memcpy( c->u_iv.iv, iv, ivlen );
}
c->unused = 0;
}
@ -507,10 +516,10 @@ do_cbc_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
/* fixme: the xor should works on words and not on
* bytes. Maybe it is a good idea to enhance the cipher backend
* API to allow for CBC handling in the backend */
for(ivp=c->iv,i=0; i < blocksize; i++ )
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] = inbuf[i] ^ *ivp++;
(*c->encrypt)( &c->context.c, outbuf, outbuf );
memcpy(c->iv, outbuf, blocksize );
memcpy(c->u_iv.iv, outbuf, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@ -530,9 +539,9 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
* for this here because it is not used otherwise */
memcpy(c->lastiv, inbuf, blocksize );
(*c->decrypt)( &c->context.c, outbuf, inbuf );
for(ivp=c->iv,i=0; i < blocksize; i++ )
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] ^= *ivp++;
memcpy(c->iv, c->lastiv, blocksize );
memcpy(c->u_iv.iv, c->lastiv, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@ -542,119 +551,181 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
static void
do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
byte *ivp;
size_t blocksize = c->blocksize;
byte *ivp;
size_t blocksize = c->blocksize;
size_t blocksize_x_2 = blocksize + blocksize;
if( nbytes <= c->unused ) {
/* short enough to be encoded by the remaining XOR mask */
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv+c->blocksize - c->unused; nbytes; nbytes--, c->unused-- )
if ( nbytes <= c->unused )
{
/* Short enough to be encoded by the remaining XOR mask. XOR
the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv+c->blocksize - c->unused; nbytes;
nbytes--, c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
return;
}
if( c->unused ) {
/* XOR the input with the IV and store input into IV */
nbytes -= c->unused;
for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
if ( c->unused )
{
/* XOR the input with the IV and store input into IV. */
nbytes -= c->unused;
for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
/* Now we can process complete blocks. */
#if 0
/* Experimental code. We may only use this for standard CFB
because for Phil's mode we need to save the IV of before the
last encryption - we don't want to do this in tghe fasf CFB
encryption routine. */
if (c->algo == CIPHER_ALGO_AES
&& nbytes >= blocksize
&& c->mode != CIPHER_MODE_PHILS_CFB) {
size_t n;
/* Now we can process complete blocks. We use a loop as long as we
have at least 2 blocks and use conditions for the rest. This
also allows to use a bulk encryption function if available. */
#ifdef USE_AES
if (nbytes >= blocksize_x_2
&& (c->algo == CIPHER_ALGO_AES
|| c->algo == CIPHER_ALGO_AES256
|| c->algo == CIPHER_ALGO_AES192))
{
unsigned int nblocks = nbytes / blocksize;
rijndael_cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
outbuf += nblocks * blocksize;
inbuf += nblocks * blocksize;
nbytes -= nblocks * blocksize;
}
else
#endif /*USE_AES*/
{
while ( nbytes >= blocksize_x_2 )
{
int i;
/* Encrypt the IV. */
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV. */
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
}
memcpy( c->lastiv, c->iv, blocksize );
n = (nbytes / blocksize) * blocksize;
rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
inbuf += n;
outbuf += n;
nbytes -= n;
if ( nbytes >= blocksize )
{
int i;
/* Save the current IV and then encrypt the IV. */
memcpy( c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
#endif
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
if( nbytes ) { /* process the remaining bytes */
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
c->unused = blocksize;
/* and apply the xor */
c->unused -= nbytes;
for(ivp=c->iv; nbytes; nbytes-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
if ( nbytes )
{
/* Save the current IV and then encrypt the IV. */
memcpy (c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
c->unused = blocksize;
/* Apply the XOR. */
c->unused -= nbytes;
for(ivp=c->u_iv.iv; nbytes; nbytes-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
}
static void
do_cfb_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
byte *ivp;
ulong temp;
size_t blocksize = c->blocksize;
if( nbytes <= c->unused ) {
/* short enough to be encoded by the remaining XOR mask */
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--){
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
return;
unsigned char *ivp;
unsigned long temp;
int i;
size_t blocksize = c->blocksize;
size_t blocksize_x_2 = blocksize + blocksize;
if (nbytes <= c->unused)
{
/* Short enough to be encoded by the remaining XOR mask. */
/* XOR the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv+blocksize - c->unused;
nbytes;
nbytes--, c->unused--)
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
return;
}
if (c->unused)
{
/* XOR the input with the IV and store input into IV. */
nbytes -= c->unused;
for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
/* Now we can process complete blocks. We use a loop as long as we
have at least 2 blocks and use conditions for the rest. This
also allows to use a bulk encryption function if available. */
#ifdef USE_AES
if (nbytes >= blocksize_x_2
&& (c->algo == CIPHER_ALGO_AES
|| c->algo == CIPHER_ALGO_AES256
|| c->algo == CIPHER_ALGO_AES192))
{
unsigned int nblocks = nbytes / blocksize;
rijndael_cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
outbuf += nblocks * blocksize;
inbuf += nblocks * blocksize;
nbytes -= nblocks * blocksize;
}
else
#endif /*USE_AES*/
{
while (nbytes >= blocksize_x_2 )
{
/* Encrypt the IV. */
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
}
if( c->unused ) {
/* XOR the input with the IV and store input into IV */
nbytes -= c->unused;
for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
if (nbytes >= blocksize )
{
/* Save the current IV and then encrypt the IV. */
memcpy ( c->lastiv, c->u_iv.iv, blocksize);
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV */
for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
/* now we can process complete blocks */
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv,i=0; i < blocksize; i++ ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
if( nbytes ) { /* process the remaining bytes */
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
c->unused = blocksize;
/* and apply the xor */
c->unused -= nbytes;
for(ivp=c->iv; nbytes; nbytes-- ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
if (nbytes)
{
/* Save the current IV and then encrypt the IV. */
memcpy ( c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
c->unused = blocksize;
/* Apply the XOR. */
c->unused -= nbytes;
for (ivp=c->u_iv.iv; nbytes; nbytes-- )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
}
@ -732,8 +803,8 @@ void
cipher_sync( CIPHER_HANDLE c )
{
if( c->mode == CIPHER_MODE_PHILS_CFB && c->unused ) {
memmove(c->iv + c->unused, c->iv, c->blocksize - c->unused );
memcpy(c->iv, c->lastiv + c->blocksize - c->unused, c->unused);
memmove(c->u_iv.iv + c->unused, c->u_iv.iv, c->blocksize - c->unused );
memcpy(c->u_iv.iv, c->lastiv + c->blocksize - c->unused, c->unused);
c->unused = 0;
}
}

View file

@ -1,5 +1,5 @@
/* Rijndael (AES) for GnuPG
* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
* Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -1866,86 +1866,119 @@ prepare_decryption( RIJNDAEL_context *ctx )
/* Encrypt one block. A and B may be the same. */
/* Encrypt one block. A and B need to be aligned on a 4 byte
boundary. A and B may be the same. */
static void
do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a)
do_encrypt_aligned (const RIJNDAEL_context *ctx,
unsigned char *b, const unsigned char *a)
{
int r;
byte temp[4][4];
int ROUNDS = ctx->ROUNDS;
#define rk (ctx->keySched)
int ROUNDS = ctx->ROUNDS;
int r;
union
{
u32 tempu32[4]; /* Force correct alignment. */
byte temp[4][4];
} u;
*((u32*)temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
*((u32*)temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
*((u32*)temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
*((u32*)temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
*((u32*)(b )) = *((u32*)T1[temp[0][0]])
^ *((u32*)T2[temp[1][1]])
^ *((u32*)T3[temp[2][2]])
^ *((u32*)T4[temp[3][3]]);
*((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
^ *((u32*)T2[temp[2][1]])
^ *((u32*)T3[temp[3][2]])
^ *((u32*)T4[temp[0][3]]);
*((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
^ *((u32*)T2[temp[3][1]])
^ *((u32*)T3[temp[0][2]])
^ *((u32*)T4[temp[1][3]]);
*((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
^ *((u32*)T2[temp[0][1]])
^ *((u32*)T3[temp[1][2]])
^ *((u32*)T4[temp[2][3]]);
for (r = 1; r < ROUNDS-1; r++) {
*((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
*((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
*((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
*((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
*((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
*((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
*((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
*((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
*((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
^ *((u32*)T2[u.temp[1][1]])
^ *((u32*)T3[u.temp[2][2]])
^ *((u32*)T4[u.temp[3][3]]));
*((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
^ *((u32*)T2[u.temp[2][1]])
^ *((u32*)T3[u.temp[3][2]])
^ *((u32*)T4[u.temp[0][3]]));
*((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
^ *((u32*)T2[u.temp[3][1]])
^ *((u32*)T3[u.temp[0][2]])
^ *((u32*)T4[u.temp[1][3]]));
*((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
^ *((u32*)T2[u.temp[0][1]])
^ *((u32*)T3[u.temp[1][2]])
^ *((u32*)T4[u.temp[2][3]]));
*((u32*)(b )) = *((u32*)T1[temp[0][0]])
^ *((u32*)T2[temp[1][1]])
^ *((u32*)T3[temp[2][2]])
^ *((u32*)T4[temp[3][3]]);
*((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
^ *((u32*)T2[temp[2][1]])
^ *((u32*)T3[temp[3][2]])
^ *((u32*)T4[temp[0][3]]);
*((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
^ *((u32*)T2[temp[3][1]])
^ *((u32*)T3[temp[0][2]])
^ *((u32*)T4[temp[1][3]]);
*((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
^ *((u32*)T2[temp[0][1]])
^ *((u32*)T3[temp[1][2]])
^ *((u32*)T4[temp[2][3]]);
for (r = 1; r < ROUNDS-1; r++)
{
*((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
*((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
*((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
*((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
*((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
^ *((u32*)T2[u.temp[1][1]])
^ *((u32*)T3[u.temp[2][2]])
^ *((u32*)T4[u.temp[3][3]]));
*((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
^ *((u32*)T2[u.temp[2][1]])
^ *((u32*)T3[u.temp[3][2]])
^ *((u32*)T4[u.temp[0][3]]));
*((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
^ *((u32*)T2[u.temp[3][1]])
^ *((u32*)T3[u.temp[0][2]])
^ *((u32*)T4[u.temp[1][3]]));
*((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
^ *((u32*)T2[u.temp[0][1]])
^ *((u32*)T3[u.temp[1][2]])
^ *((u32*)T4[u.temp[2][3]]));
}
/* last round is special */
*((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
*((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
*((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
*((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
b[ 0] = T1[temp[0][0]][1];
b[ 1] = T1[temp[1][1]][1];
b[ 2] = T1[temp[2][2]][1];
b[ 3] = T1[temp[3][3]][1];
b[ 4] = T1[temp[1][0]][1];
b[ 5] = T1[temp[2][1]][1];
b[ 6] = T1[temp[3][2]][1];
b[ 7] = T1[temp[0][3]][1];
b[ 8] = T1[temp[2][0]][1];
b[ 9] = T1[temp[3][1]][1];
b[10] = T1[temp[0][2]][1];
b[11] = T1[temp[1][3]][1];
b[12] = T1[temp[3][0]][1];
b[13] = T1[temp[0][1]][1];
b[14] = T1[temp[1][2]][1];
b[15] = T1[temp[2][3]][1];
*((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
*((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
*((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
*((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
/* Last round is special. */
*((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
*((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
*((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
*((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
b[ 0] = T1[u.temp[0][0]][1];
b[ 1] = T1[u.temp[1][1]][1];
b[ 2] = T1[u.temp[2][2]][1];
b[ 3] = T1[u.temp[3][3]][1];
b[ 4] = T1[u.temp[1][0]][1];
b[ 5] = T1[u.temp[2][1]][1];
b[ 6] = T1[u.temp[3][2]][1];
b[ 7] = T1[u.temp[0][3]][1];
b[ 8] = T1[u.temp[2][0]][1];
b[ 9] = T1[u.temp[3][1]][1];
b[10] = T1[u.temp[0][2]][1];
b[11] = T1[u.temp[1][3]][1];
b[12] = T1[u.temp[3][0]][1];
b[13] = T1[u.temp[0][1]][1];
b[14] = T1[u.temp[1][2]][1];
b[15] = T1[u.temp[2][3]][1];
*((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
*((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
*((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
*((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
#undef rk
}
static void
do_encrypt (const RIJNDAEL_context *ctx,
unsigned char *bx, const unsigned char *ax)
{
/* BX and AX are not necessary correctly aligned. Thus we need to
copy them here. */
union
{
u32 dummy[4];
byte a[16];
} a;
union
{
u32 dummy[4];
byte b[16];
} b;
memcpy (a.a, ax, 16);
do_encrypt_aligned (ctx, b.b, a.a);
memcpy (bx, b.b, 16);
}
static void
rijndael_encrypt (void *ctx, byte *b, const byte *a)
{
@ -1953,56 +1986,6 @@ rijndael_encrypt (void *ctx, byte *b, const byte *a)
burn_stack (16 + 2*sizeof(int));
}
#if 0
/* Experimental code. Needs to be generalized and we might want to
have variants for all possible sizes of the largest scalar type.
Also need to make sure that INBUF and OUTBUF are properlu
aligned. */
void
rijndael_cfb_encrypt (void *ctx, byte *iv,
byte *outbuf, const byte *inbuf, size_t nbytes)
{
/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */
/* { */
/* Not properly aligned, use the slow version. Actually the
compiler might even optimize it this pretty well if the
target CPU has relaxed alignment requirements. Thus it is
questionable whether we should at all go into the hassles of
doing alignment wise optimizations by ourself. A quick test
with gcc 4.0 on ia32 did showed any advantages. */
byte *ivp;
int i;
while (nbytes >= 16)
{
do_encrypt (ctx, iv, iv);
for (i=0, ivp = iv; i < 16; i++)
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= 16;
}
/* } */
/* else */
/* { */
/* u32 *ivp; */
/* u32 *ob = (u32*)outbuf; */
/* const u32 *ib = (const u32*)inbuf; */
/* while (nbytes >= 16) */
/* { */
/* do_encrypt (ctx, iv, iv); */
/* ivp = iv; */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp ^= *ib++); */
/* nbytes -= 16; */
/* } */
/* } */
burn_stack (16 + 2*sizeof(int));
}
#endif
/* Decrypt one block. a and b may be the same. */
@ -2097,6 +2080,67 @@ rijndael_decrypt (void *ctx, byte *b, const byte *a)
do_decrypt (ctx, b, a);
burn_stack (16+2*sizeof(int));
}
/* Bulk encryption of complete blocks in CFB mode. Caller needs to
make sure that IV is aligned on an unsigned long boundary. This
function is only intended for the bulk encryption feature of
cipher.c. */
void
rijndael_cfb_enc (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks)
{
RIJNDAEL_context *ctx = context;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char *ivp;
int i;
for ( ;nblocks; nblocks-- )
{
/* Encrypt the IV. */
do_encrypt_aligned (ctx, iv, iv);
/* XOR the input with the IV and store input into IV. */
for (ivp=iv,i=0; i < 16; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
burn_stack (16 + 2*sizeof(int));
}
/* Bulk decryption of complete blocks in CFB mode. Caller needs to
make sure that IV is aligned on an unisgned lonhg boundary. This
function is only intended for the bulk encryption feature of
cipher.c. */
void
rijndael_cfb_dec (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks)
{
RIJNDAEL_context *ctx = context;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char *ivp;
unsigned char temp;
int i;
for ( ;nblocks; nblocks-- )
{
do_encrypt_aligned (ctx, iv, iv);
for (ivp=iv,i=0; i < 16; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
burn_stack (16 + 2*sizeof(int));
}
/* Test a single encryption and decryption with each key size. */