1
0
mirror of git://git.gnupg.org/gnupg.git synced 2024-06-21 01:02:46 +02:00

Improved AES performance.

This commit is contained in:
Werner Koch 2008-03-22 17:01:37 +00:00
parent a614eabba9
commit 537cada38e
10 changed files with 392 additions and 242 deletions

8
NEWS
View File

@ -1,3 +1,11 @@
Noteworthy changes in version 1.4.9 (unreleased)
------------------------------------------------
* Improved AES encryption performance by more than 20% (on ia32).
Decryption is also a bit faster.
Noteworthy changes in version 1.4.8 (2007-12-20)
------------------------------------------------

View File

@ -1,3 +1,13 @@
2008-03-22 Werner Koch <wk@g10code.com>
* cipher.c (struct cipher_handle_s): Make sure IV is u32
aligned. Change all users of IV.
(do_cfb_encrypt): Optimize and use bulk code for AES.
(do_cfb_decrypt): Ditto.
* rijndael.c (do_encrypt): Remove.
(do_encrypt_aligned, do_encrypt): New. Taken from libgcrypt.
(rijndael_cfb_enc, rijndael_cfb_dec): New.
2007-12-12 Werner Koch <wk@g10code.com>
* pubkey.c (pubkey_encrypt, pubkey_decrypt): Allow type 20 keys.

View File

@ -118,8 +118,6 @@ twofish_get_info( int algo, size_t *keylen,
void (**decryptf)( void *c, byte *outbuf, const byte *inbuf )
);
/* this is just a kludge for the time we have not yet changed the cipher
* stuff to the scheme we use for random and digests */
const char *
rijndael_get_info( int algo, size_t *keylen,
size_t *blocksize, size_t *contextsize,
@ -127,6 +125,12 @@ rijndael_get_info( int algo, size_t *keylen,
void (**encryptf)(void *c, byte *outbuf, const byte *inbuf),
void (**decryptf)(void *c, byte *outbuf, const byte *inbuf)
);
void rijndael_cfb_enc (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
void rijndael_cfb_dec (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
const char *
idea_get_info( int algo, size_t *keylen,

View File

@ -1,6 +1,6 @@
/* cipher.c - cipher dispatcher
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
* 2007 Free Software Foundation, Inc.
* 2007, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -52,17 +52,26 @@ static struct cipher_table_s cipher_table[TABLE_SIZE];
static int disabled_algos[TABLE_SIZE];
struct cipher_handle_s {
int algo;
int mode;
size_t blocksize;
byte iv[MAX_BLOCKSIZE]; /* (this should be ulong aligned) */
byte lastiv[MAX_BLOCKSIZE];
int unused; /* in IV */
int (*setkey)( void *c, const byte *key, unsigned keylen );
void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
PROPERLY_ALIGNED_TYPE context;
struct cipher_handle_s
{
int algo;
int mode;
size_t blocksize;
/* The initialization vector. To help code optimization we make
sure that it is aligned on an unsigned long and u32 boundary. */
union {
unsigned long dummy_ul_iv;
u32 dummy_u32_iv;
unsigned char iv[MAX_BLOCKSIZE];
} u_iv;
byte lastiv[MAX_BLOCKSIZE];
int unused; /* in IV */
int (*setkey)( void *c, const byte *key, unsigned keylen );
void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
PROPERLY_ALIGNED_TYPE context;
};
@ -459,14 +468,14 @@ cipher_setkey( CIPHER_HANDLE c, byte *key, unsigned keylen )
void
cipher_setiv( CIPHER_HANDLE c, const byte *iv, unsigned ivlen )
{
memset( c->iv, 0, c->blocksize );
memset( c->u_iv.iv, 0, c->blocksize );
if( iv ) {
if( ivlen != c->blocksize )
log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
ivlen, (unsigned)c->blocksize );
if( ivlen > c->blocksize )
ivlen = c->blocksize;
memcpy( c->iv, iv, ivlen );
memcpy( c->u_iv.iv, iv, ivlen );
}
c->unused = 0;
}
@ -507,10 +516,10 @@ do_cbc_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
/* fixme: the xor should works on words and not on
* bytes. Maybe it is a good idea to enhance the cipher backend
* API to allow for CBC handling in the backend */
for(ivp=c->iv,i=0; i < blocksize; i++ )
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] = inbuf[i] ^ *ivp++;
(*c->encrypt)( &c->context.c, outbuf, outbuf );
memcpy(c->iv, outbuf, blocksize );
memcpy(c->u_iv.iv, outbuf, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@ -530,9 +539,9 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
* for this here because it is not used otherwise */
memcpy(c->lastiv, inbuf, blocksize );
(*c->decrypt)( &c->context.c, outbuf, inbuf );
for(ivp=c->iv,i=0; i < blocksize; i++ )
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] ^= *ivp++;
memcpy(c->iv, c->lastiv, blocksize );
memcpy(c->u_iv.iv, c->lastiv, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@ -542,119 +551,181 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
static void
do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
byte *ivp;
size_t blocksize = c->blocksize;
byte *ivp;
size_t blocksize = c->blocksize;
size_t blocksize_x_2 = blocksize + blocksize;
if( nbytes <= c->unused ) {
/* short enough to be encoded by the remaining XOR mask */
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv+c->blocksize - c->unused; nbytes; nbytes--, c->unused-- )
if ( nbytes <= c->unused )
{
/* Short enough to be encoded by the remaining XOR mask. XOR
the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv+c->blocksize - c->unused; nbytes;
nbytes--, c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
return;
}
if( c->unused ) {
/* XOR the input with the IV and store input into IV */
nbytes -= c->unused;
for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
if ( c->unused )
{
/* XOR the input with the IV and store input into IV. */
nbytes -= c->unused;
for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
/* Now we can process complete blocks. */
#if 0
/* Experimental code. We may only use this for standard CFB
because for Phil's mode we need to save the IV of before the
last encryption - we don't want to do this in tghe fasf CFB
encryption routine. */
if (c->algo == CIPHER_ALGO_AES
&& nbytes >= blocksize
&& c->mode != CIPHER_MODE_PHILS_CFB) {
size_t n;
/* Now we can process complete blocks. We use a loop as long as we
have at least 2 blocks and use conditions for the rest. This
also allows to use a bulk encryption function if available. */
#ifdef USE_AES
if (nbytes >= blocksize_x_2
&& (c->algo == CIPHER_ALGO_AES
|| c->algo == CIPHER_ALGO_AES256
|| c->algo == CIPHER_ALGO_AES192))
{
unsigned int nblocks = nbytes / blocksize;
rijndael_cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
outbuf += nblocks * blocksize;
inbuf += nblocks * blocksize;
nbytes -= nblocks * blocksize;
}
else
#endif /*USE_AES*/
{
while ( nbytes >= blocksize_x_2 )
{
int i;
/* Encrypt the IV. */
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV. */
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
}
memcpy( c->lastiv, c->iv, blocksize );
n = (nbytes / blocksize) * blocksize;
rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
inbuf += n;
outbuf += n;
nbytes -= n;
if ( nbytes >= blocksize )
{
int i;
/* Save the current IV and then encrypt the IV. */
memcpy( c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
#endif
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv,i=0; i < blocksize; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= blocksize;
}
if( nbytes ) { /* process the remaining bytes */
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
c->unused = blocksize;
/* and apply the xor */
c->unused -= nbytes;
for(ivp=c->iv; nbytes; nbytes-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
if ( nbytes )
{
/* Save the current IV and then encrypt the IV. */
memcpy (c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
c->unused = blocksize;
/* Apply the XOR. */
c->unused -= nbytes;
for(ivp=c->u_iv.iv; nbytes; nbytes-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
}
static void
do_cfb_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
byte *ivp;
ulong temp;
size_t blocksize = c->blocksize;
if( nbytes <= c->unused ) {
/* short enough to be encoded by the remaining XOR mask */
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--){
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
return;
unsigned char *ivp;
unsigned long temp;
int i;
size_t blocksize = c->blocksize;
size_t blocksize_x_2 = blocksize + blocksize;
if (nbytes <= c->unused)
{
/* Short enough to be encoded by the remaining XOR mask. */
/* XOR the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv+blocksize - c->unused;
nbytes;
nbytes--, c->unused--)
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
return;
}
if (c->unused)
{
/* XOR the input with the IV and store input into IV. */
nbytes -= c->unused;
for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
/* Now we can process complete blocks. We use a loop as long as we
have at least 2 blocks and use conditions for the rest. This
also allows to use a bulk encryption function if available. */
#ifdef USE_AES
if (nbytes >= blocksize_x_2
&& (c->algo == CIPHER_ALGO_AES
|| c->algo == CIPHER_ALGO_AES256
|| c->algo == CIPHER_ALGO_AES192))
{
unsigned int nblocks = nbytes / blocksize;
rijndael_cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
outbuf += nblocks * blocksize;
inbuf += nblocks * blocksize;
nbytes -= nblocks * blocksize;
}
else
#endif /*USE_AES*/
{
while (nbytes >= blocksize_x_2 )
{
/* Encrypt the IV. */
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV. */
for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
}
if( c->unused ) {
/* XOR the input with the IV and store input into IV */
nbytes -= c->unused;
for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
if (nbytes >= blocksize )
{
/* Save the current IV and then encrypt the IV. */
memcpy ( c->lastiv, c->u_iv.iv, blocksize);
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
/* XOR the input with the IV and store input into IV */
for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
/* now we can process complete blocks */
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
/* XOR the input with the IV and store input into IV */
for(ivp=c->iv,i=0; i < blocksize; i++ ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
nbytes -= blocksize;
}
if( nbytes ) { /* process the remaining bytes */
/* encrypt the IV (and save the current one) */
memcpy( c->lastiv, c->iv, blocksize );
(*c->encrypt)( &c->context.c, c->iv, c->iv );
c->unused = blocksize;
/* and apply the xor */
c->unused -= nbytes;
for(ivp=c->iv; nbytes; nbytes-- ) {
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
if (nbytes)
{
/* Save the current IV and then encrypt the IV. */
memcpy ( c->lastiv, c->u_iv.iv, blocksize );
c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
c->unused = blocksize;
/* Apply the XOR. */
c->unused -= nbytes;
for (ivp=c->u_iv.iv; nbytes; nbytes-- )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
}
@ -732,8 +803,8 @@ void
cipher_sync( CIPHER_HANDLE c )
{
if( c->mode == CIPHER_MODE_PHILS_CFB && c->unused ) {
memmove(c->iv + c->unused, c->iv, c->blocksize - c->unused );
memcpy(c->iv, c->lastiv + c->blocksize - c->unused, c->unused);
memmove(c->u_iv.iv + c->unused, c->u_iv.iv, c->blocksize - c->unused );
memcpy(c->u_iv.iv, c->lastiv + c->blocksize - c->unused, c->unused);
c->unused = 0;
}
}

View File

@ -1,5 +1,5 @@
/* Rijndael (AES) for GnuPG
* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
* Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -1866,86 +1866,119 @@ prepare_decryption( RIJNDAEL_context *ctx )
/* Encrypt one block. A and B may be the same. */
/* Encrypt one block. A and B need to be aligned on a 4 byte
boundary. A and B may be the same. */
static void
do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a)
do_encrypt_aligned (const RIJNDAEL_context *ctx,
unsigned char *b, const unsigned char *a)
{
int r;
byte temp[4][4];
int ROUNDS = ctx->ROUNDS;
#define rk (ctx->keySched)
int ROUNDS = ctx->ROUNDS;
int r;
union
{
u32 tempu32[4]; /* Force correct alignment. */
byte temp[4][4];
} u;
*((u32*)temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
*((u32*)temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
*((u32*)temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
*((u32*)temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
*((u32*)(b )) = *((u32*)T1[temp[0][0]])
^ *((u32*)T2[temp[1][1]])
^ *((u32*)T3[temp[2][2]])
^ *((u32*)T4[temp[3][3]]);
*((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
^ *((u32*)T2[temp[2][1]])
^ *((u32*)T3[temp[3][2]])
^ *((u32*)T4[temp[0][3]]);
*((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
^ *((u32*)T2[temp[3][1]])
^ *((u32*)T3[temp[0][2]])
^ *((u32*)T4[temp[1][3]]);
*((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
^ *((u32*)T2[temp[0][1]])
^ *((u32*)T3[temp[1][2]])
^ *((u32*)T4[temp[2][3]]);
for (r = 1; r < ROUNDS-1; r++) {
*((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
*((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
*((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
*((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
*((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
*((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
*((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
*((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
*((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
^ *((u32*)T2[u.temp[1][1]])
^ *((u32*)T3[u.temp[2][2]])
^ *((u32*)T4[u.temp[3][3]]));
*((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
^ *((u32*)T2[u.temp[2][1]])
^ *((u32*)T3[u.temp[3][2]])
^ *((u32*)T4[u.temp[0][3]]));
*((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
^ *((u32*)T2[u.temp[3][1]])
^ *((u32*)T3[u.temp[0][2]])
^ *((u32*)T4[u.temp[1][3]]));
*((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
^ *((u32*)T2[u.temp[0][1]])
^ *((u32*)T3[u.temp[1][2]])
^ *((u32*)T4[u.temp[2][3]]));
*((u32*)(b )) = *((u32*)T1[temp[0][0]])
^ *((u32*)T2[temp[1][1]])
^ *((u32*)T3[temp[2][2]])
^ *((u32*)T4[temp[3][3]]);
*((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
^ *((u32*)T2[temp[2][1]])
^ *((u32*)T3[temp[3][2]])
^ *((u32*)T4[temp[0][3]]);
*((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
^ *((u32*)T2[temp[3][1]])
^ *((u32*)T3[temp[0][2]])
^ *((u32*)T4[temp[1][3]]);
*((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
^ *((u32*)T2[temp[0][1]])
^ *((u32*)T3[temp[1][2]])
^ *((u32*)T4[temp[2][3]]);
for (r = 1; r < ROUNDS-1; r++)
{
*((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
*((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
*((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
*((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
*((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
^ *((u32*)T2[u.temp[1][1]])
^ *((u32*)T3[u.temp[2][2]])
^ *((u32*)T4[u.temp[3][3]]));
*((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
^ *((u32*)T2[u.temp[2][1]])
^ *((u32*)T3[u.temp[3][2]])
^ *((u32*)T4[u.temp[0][3]]));
*((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
^ *((u32*)T2[u.temp[3][1]])
^ *((u32*)T3[u.temp[0][2]])
^ *((u32*)T4[u.temp[1][3]]));
*((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
^ *((u32*)T2[u.temp[0][1]])
^ *((u32*)T3[u.temp[1][2]])
^ *((u32*)T4[u.temp[2][3]]));
}
/* last round is special */
*((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
*((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
*((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
*((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
b[ 0] = T1[temp[0][0]][1];
b[ 1] = T1[temp[1][1]][1];
b[ 2] = T1[temp[2][2]][1];
b[ 3] = T1[temp[3][3]][1];
b[ 4] = T1[temp[1][0]][1];
b[ 5] = T1[temp[2][1]][1];
b[ 6] = T1[temp[3][2]][1];
b[ 7] = T1[temp[0][3]][1];
b[ 8] = T1[temp[2][0]][1];
b[ 9] = T1[temp[3][1]][1];
b[10] = T1[temp[0][2]][1];
b[11] = T1[temp[1][3]][1];
b[12] = T1[temp[3][0]][1];
b[13] = T1[temp[0][1]][1];
b[14] = T1[temp[1][2]][1];
b[15] = T1[temp[2][3]][1];
*((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
*((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
*((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
*((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
/* Last round is special. */
*((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
*((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
*((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
*((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
b[ 0] = T1[u.temp[0][0]][1];
b[ 1] = T1[u.temp[1][1]][1];
b[ 2] = T1[u.temp[2][2]][1];
b[ 3] = T1[u.temp[3][3]][1];
b[ 4] = T1[u.temp[1][0]][1];
b[ 5] = T1[u.temp[2][1]][1];
b[ 6] = T1[u.temp[3][2]][1];
b[ 7] = T1[u.temp[0][3]][1];
b[ 8] = T1[u.temp[2][0]][1];
b[ 9] = T1[u.temp[3][1]][1];
b[10] = T1[u.temp[0][2]][1];
b[11] = T1[u.temp[1][3]][1];
b[12] = T1[u.temp[3][0]][1];
b[13] = T1[u.temp[0][1]][1];
b[14] = T1[u.temp[1][2]][1];
b[15] = T1[u.temp[2][3]][1];
*((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
*((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
*((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
*((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
#undef rk
}
static void
do_encrypt (const RIJNDAEL_context *ctx,
unsigned char *bx, const unsigned char *ax)
{
/* BX and AX are not necessary correctly aligned. Thus we need to
copy them here. */
union
{
u32 dummy[4];
byte a[16];
} a;
union
{
u32 dummy[4];
byte b[16];
} b;
memcpy (a.a, ax, 16);
do_encrypt_aligned (ctx, b.b, a.a);
memcpy (bx, b.b, 16);
}
static void
rijndael_encrypt (void *ctx, byte *b, const byte *a)
{
@ -1953,56 +1986,6 @@ rijndael_encrypt (void *ctx, byte *b, const byte *a)
burn_stack (16 + 2*sizeof(int));
}
#if 0
/* Experimental code. Needs to be generalized and we might want to
have variants for all possible sizes of the largest scalar type.
Also need to make sure that INBUF and OUTBUF are properlu
aligned. */
void
rijndael_cfb_encrypt (void *ctx, byte *iv,
byte *outbuf, const byte *inbuf, size_t nbytes)
{
/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */
/* { */
/* Not properly aligned, use the slow version. Actually the
compiler might even optimize it this pretty well if the
target CPU has relaxed alignment requirements. Thus it is
questionable whether we should at all go into the hassles of
doing alignment wise optimizations by ourself. A quick test
with gcc 4.0 on ia32 did showed any advantages. */
byte *ivp;
int i;
while (nbytes >= 16)
{
do_encrypt (ctx, iv, iv);
for (i=0, ivp = iv; i < 16; i++)
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= 16;
}
/* } */
/* else */
/* { */
/* u32 *ivp; */
/* u32 *ob = (u32*)outbuf; */
/* const u32 *ib = (const u32*)inbuf; */
/* while (nbytes >= 16) */
/* { */
/* do_encrypt (ctx, iv, iv); */
/* ivp = iv; */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp ^= *ib++); */
/* nbytes -= 16; */
/* } */
/* } */
burn_stack (16 + 2*sizeof(int));
}
#endif
/* Decrypt one block. a and b may be the same. */
@ -2097,6 +2080,67 @@ rijndael_decrypt (void *ctx, byte *b, const byte *a)
do_decrypt (ctx, b, a);
burn_stack (16+2*sizeof(int));
}
/* Bulk encryption of complete blocks in CFB mode. Caller needs to
make sure that IV is aligned on an unsigned long boundary. This
function is only intended for the bulk encryption feature of
cipher.c. */
void
rijndael_cfb_enc (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks)
{
RIJNDAEL_context *ctx = context;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char *ivp;
int i;
for ( ;nblocks; nblocks-- )
{
/* Encrypt the IV. */
do_encrypt_aligned (ctx, iv, iv);
/* XOR the input with the IV and store input into IV. */
for (ivp=iv,i=0; i < 16; i++ )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
burn_stack (16 + 2*sizeof(int));
}
/* Bulk decryption of complete blocks in CFB mode. Caller needs to
make sure that IV is aligned on an unisgned lonhg boundary. This
function is only intended for the bulk encryption feature of
cipher.c. */
void
rijndael_cfb_dec (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks)
{
RIJNDAEL_context *ctx = context;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char *ivp;
unsigned char temp;
int i;
for ( ;nblocks; nblocks-- )
{
do_encrypt_aligned (ctx, iv, iv);
for (ivp=iv,i=0; i < 16; i++ )
{
temp = *inbuf++;
*outbuf++ = *ivp ^ temp;
*ivp++ = temp;
}
}
burn_stack (16 + 2*sizeof(int));
}
/* Test a single encryption and decryption with each key size. */

View File

@ -25,7 +25,7 @@ min_automake_version="1.9.3"
# Remember to change the version number immediately *after* a release.
# Set my_issvn to "yes" for non-released code. Remember to run an
# "svn up" and "autogen.sh --force" right before creating a distribution.
m4_define([my_version], [1.4.9rc1])
m4_define([my_version], [1.4.9])
m4_define([my_issvn], [yes])
m4_define([svn_revision], m4_esyscmd([echo $((svn info 2>/dev/null \

View File

@ -1,3 +1,9 @@
2007-12-21 Werner Koch <wk@g10code.com>
* README.W32: Tell that Vista is supported and that at least NT-4
is required. It might still work on older systems, but I don't
know for sure.
2007-12-12 Werner Koch <wk@g10code.com>
* gpg.texi, specify-user-id.texi: Update from gnupg-2.

View File

@ -1,7 +1,8 @@
README.W32 -*- text -*-
This is a binary package with GnuPG for MS-Windows 95, 98, WNT, W2000
and XP. See the file README for generic instructions and usage hints.
This is a binary package with GnuPG for MS-Windows NT-4, W2000, XP and
Vista. A native version for 64 bit is not available. See the file
README for generic instructions and usage hints.
A FAQ comes with this package and a probably more recent one can be
found online at http://www.gnupg.org/faq.html. See

View File

@ -1,3 +1,7 @@
2008-01-30 Werner Koch <wk@g10code.com>
* w32installer.nsi: Set the OutPath back.
2007-12-12 Werner Koch <wk@g10code.com>
* config.sub, config.guess: Update to version 2007-11-19.

View File

@ -351,6 +351,8 @@ Section "-Finish"
WriteRegStr HKCU "Software\GNU\GnuPG" "Lang" $R3
;;
# Set the Outpath pack so that the README file can be displayed.
SetOutPath "$INSTDIR"
SectionEnd ; "-Finish"