From 537cada38ede5174f8500d0b81459d87b3da16ac Mon Sep 17 00:00:00 2001 From: Werner Koch Date: Sat, 22 Mar 2008 17:01:37 +0000 Subject: [PATCH] Improved AES performance. --- NEWS | 8 ++ cipher/ChangeLog | 10 ++ cipher/algorithms.h | 8 +- cipher/cipher.c | 301 ++++++++++++++++++++++++--------------- cipher/rijndael.c | 288 +++++++++++++++++++++---------------- configure.ac | 2 +- doc/ChangeLog | 6 + doc/README.W32 | 5 +- scripts/ChangeLog | 4 + scripts/w32installer.nsi | 2 + 10 files changed, 392 insertions(+), 242 deletions(-) diff --git a/NEWS b/NEWS index 8ab4e0896..03e9ae6dc 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +Noteworthy changes in version 1.4.9 (unreleased) +------------------------------------------------ + + * Improved AES encryption performance by more than 20% (on ia32). + Decryption is also a bit faster. + + + Noteworthy changes in version 1.4.8 (2007-12-20) ------------------------------------------------ diff --git a/cipher/ChangeLog b/cipher/ChangeLog index b01d0a6ef..ce3f04349 100644 --- a/cipher/ChangeLog +++ b/cipher/ChangeLog @@ -1,3 +1,13 @@ +2008-03-22 Werner Koch + + * cipher.c (struct cipher_handle_s): Make sure IV is u32 + aligned. Change all users of IV. + (do_cfb_encrypt): Optimize and use bulk code for AES. + (do_cfb_decrypt): Ditto. + * rijndael.c (do_encrypt): Remove. + (do_encrypt_aligned, do_encrypt): New. Taken from libgcrypt. + (rijndael_cfb_enc, rijndael_cfb_dec): New. + 2007-12-12 Werner Koch * pubkey.c (pubkey_encrypt, pubkey_decrypt): Allow type 20 keys. diff --git a/cipher/algorithms.h b/cipher/algorithms.h index 5c972f291..9deefb4d8 100644 --- a/cipher/algorithms.h +++ b/cipher/algorithms.h @@ -118,8 +118,6 @@ twofish_get_info( int algo, size_t *keylen, void (**decryptf)( void *c, byte *outbuf, const byte *inbuf ) ); -/* this is just a kludge for the time we have not yet changed the cipher - * stuff to the scheme we use for random and digests */ const char * rijndael_get_info( int algo, size_t *keylen, size_t *blocksize, size_t *contextsize, @@ -127,6 +125,12 @@ rijndael_get_info( int algo, size_t *keylen, void (**encryptf)(void *c, byte *outbuf, const byte *inbuf), void (**decryptf)(void *c, byte *outbuf, const byte *inbuf) ); +void rijndael_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); +void rijndael_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); const char * idea_get_info( int algo, size_t *keylen, diff --git a/cipher/cipher.c b/cipher/cipher.c index a6489d9f5..9d9c82293 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1,6 +1,6 @@ /* cipher.c - cipher dispatcher * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 - * 2007 Free Software Foundation, Inc. + * 2007, 2008 Free Software Foundation, Inc. * * This file is part of GnuPG. * @@ -52,17 +52,26 @@ static struct cipher_table_s cipher_table[TABLE_SIZE]; static int disabled_algos[TABLE_SIZE]; -struct cipher_handle_s { - int algo; - int mode; - size_t blocksize; - byte iv[MAX_BLOCKSIZE]; /* (this should be ulong aligned) */ - byte lastiv[MAX_BLOCKSIZE]; - int unused; /* in IV */ - int (*setkey)( void *c, const byte *key, unsigned keylen ); - void (*encrypt)( void *c, byte *outbuf, const byte *inbuf ); - void (*decrypt)( void *c, byte *outbuf, const byte *inbuf ); - PROPERLY_ALIGNED_TYPE context; +struct cipher_handle_s +{ + int algo; + int mode; + size_t blocksize; + + /* The initialization vector. To help code optimization we make + sure that it is aligned on an unsigned long and u32 boundary. */ + union { + unsigned long dummy_ul_iv; + u32 dummy_u32_iv; + unsigned char iv[MAX_BLOCKSIZE]; + } u_iv; + + byte lastiv[MAX_BLOCKSIZE]; + int unused; /* in IV */ + int (*setkey)( void *c, const byte *key, unsigned keylen ); + void (*encrypt)( void *c, byte *outbuf, const byte *inbuf ); + void (*decrypt)( void *c, byte *outbuf, const byte *inbuf ); + PROPERLY_ALIGNED_TYPE context; }; @@ -459,14 +468,14 @@ cipher_setkey( CIPHER_HANDLE c, byte *key, unsigned keylen ) void cipher_setiv( CIPHER_HANDLE c, const byte *iv, unsigned ivlen ) { - memset( c->iv, 0, c->blocksize ); + memset( c->u_iv.iv, 0, c->blocksize ); if( iv ) { if( ivlen != c->blocksize ) log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", ivlen, (unsigned)c->blocksize ); if( ivlen > c->blocksize ) ivlen = c->blocksize; - memcpy( c->iv, iv, ivlen ); + memcpy( c->u_iv.iv, iv, ivlen ); } c->unused = 0; } @@ -507,10 +516,10 @@ do_cbc_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks ) /* fixme: the xor should works on words and not on * bytes. Maybe it is a good idea to enhance the cipher backend * API to allow for CBC handling in the backend */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) outbuf[i] = inbuf[i] ^ *ivp++; (*c->encrypt)( &c->context.c, outbuf, outbuf ); - memcpy(c->iv, outbuf, blocksize ); + memcpy(c->u_iv.iv, outbuf, blocksize ); inbuf += c->blocksize; outbuf += c->blocksize; } @@ -530,9 +539,9 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks ) * for this here because it is not used otherwise */ memcpy(c->lastiv, inbuf, blocksize ); (*c->decrypt)( &c->context.c, outbuf, inbuf ); - for(ivp=c->iv,i=0; i < blocksize; i++ ) + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) outbuf[i] ^= *ivp++; - memcpy(c->iv, c->lastiv, blocksize ); + memcpy(c->u_iv.iv, c->lastiv, blocksize ); inbuf += c->blocksize; outbuf += c->blocksize; } @@ -542,119 +551,181 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks ) static void do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes ) { - byte *ivp; - size_t blocksize = c->blocksize; + byte *ivp; + size_t blocksize = c->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; - if( nbytes <= c->unused ) { - /* short enough to be encoded by the remaining XOR mask */ - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv+c->blocksize - c->unused; nbytes; nbytes--, c->unused-- ) + if ( nbytes <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. XOR + the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv+c->blocksize - c->unused; nbytes; + nbytes--, c->unused-- ) *outbuf++ = (*ivp++ ^= *inbuf++); return; } - - if( c->unused ) { - /* XOR the input with the IV and store input into IV */ - nbytes -= c->unused; - for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) - *outbuf++ = (*ivp++ ^= *inbuf++); + + if ( c->unused ) + { + /* XOR the input with the IV and store input into IV. */ + nbytes -= c->unused; + for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) + *outbuf++ = (*ivp++ ^= *inbuf++); } - /* Now we can process complete blocks. */ -#if 0 - /* Experimental code. We may only use this for standard CFB - because for Phil's mode we need to save the IV of before the - last encryption - we don't want to do this in tghe fasf CFB - encryption routine. */ - if (c->algo == CIPHER_ALGO_AES - && nbytes >= blocksize - && c->mode != CIPHER_MODE_PHILS_CFB) { - size_t n; + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ +#ifdef USE_AES + if (nbytes >= blocksize_x_2 + && (c->algo == CIPHER_ALGO_AES + || c->algo == CIPHER_ALGO_AES256 + || c->algo == CIPHER_ALGO_AES192)) + { + unsigned int nblocks = nbytes / blocksize; + rijndael_cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + nbytes -= nblocks * blocksize; + } + else +#endif /*USE_AES*/ + { + while ( nbytes >= blocksize_x_2 ) + { + int i; + /* Encrypt the IV. */ + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV. */ + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + nbytes -= blocksize; + } + } - memcpy( c->lastiv, c->iv, blocksize ); - n = (nbytes / blocksize) * blocksize; - rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n); - inbuf += n; - outbuf += n; - nbytes -= n; + if ( nbytes >= blocksize ) + { + int i; + /* Save the current IV and then encrypt the IV. */ + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV */ + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + nbytes -= blocksize; } -#endif - while( nbytes >= blocksize ) { - int i; - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - (*c->encrypt)( &c->context.c, c->iv, c->iv ); - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) - *outbuf++ = (*ivp++ ^= *inbuf++); - nbytes -= blocksize; - } - if( nbytes ) { /* process the remaining bytes */ - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - (*c->encrypt)( &c->context.c, c->iv, c->iv ); - c->unused = blocksize; - /* and apply the xor */ - c->unused -= nbytes; - for(ivp=c->iv; nbytes; nbytes-- ) - *outbuf++ = (*ivp++ ^= *inbuf++); + if ( nbytes ) + { + /* Save the current IV and then encrypt the IV. */ + memcpy (c->lastiv, c->u_iv.iv, blocksize ); + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= nbytes; + for(ivp=c->u_iv.iv; nbytes; nbytes-- ) + *outbuf++ = (*ivp++ ^= *inbuf++); } } + static void do_cfb_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes ) { - byte *ivp; - ulong temp; - size_t blocksize = c->blocksize; - - if( nbytes <= c->unused ) { - /* short enough to be encoded by the remaining XOR mask */ - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--){ - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } - return; + unsigned char *ivp; + unsigned long temp; + int i; + size_t blocksize = c->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; + + if (nbytes <= c->unused) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv+blocksize - c->unused; + nbytes; + nbytes--, c->unused--) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + return; + } + + if (c->unused) + { + /* XOR the input with the IV and store input into IV. */ + nbytes -= c->unused; + for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ +#ifdef USE_AES + if (nbytes >= blocksize_x_2 + && (c->algo == CIPHER_ALGO_AES + || c->algo == CIPHER_ALGO_AES256 + || c->algo == CIPHER_ALGO_AES192)) + { + unsigned int nblocks = nbytes / blocksize; + rijndael_cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + nbytes -= nblocks * blocksize; + } + else +#endif /*USE_AES*/ + { + while (nbytes >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + nbytes -= blocksize; + } } - if( c->unused ) { - /* XOR the input with the IV and store input into IV */ - nbytes -= c->unused; - for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } + if (nbytes >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + memcpy ( c->lastiv, c->u_iv.iv, blocksize); + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV */ + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + nbytes -= blocksize; } - /* now we can process complete blocks */ - while( nbytes >= blocksize ) { - int i; - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - (*c->encrypt)( &c->context.c, c->iv, c->iv ); - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } - nbytes -= blocksize; - } - if( nbytes ) { /* process the remaining bytes */ - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - (*c->encrypt)( &c->context.c, c->iv, c->iv ); - c->unused = blocksize; - /* and apply the xor */ - c->unused -= nbytes; - for(ivp=c->iv; nbytes; nbytes-- ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } + if (nbytes) + { + /* Save the current IV and then encrypt the IV. */ + memcpy ( c->lastiv, c->u_iv.iv, blocksize ); + c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= nbytes; + for (ivp=c->u_iv.iv; nbytes; nbytes-- ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } } } @@ -732,8 +803,8 @@ void cipher_sync( CIPHER_HANDLE c ) { if( c->mode == CIPHER_MODE_PHILS_CFB && c->unused ) { - memmove(c->iv + c->unused, c->iv, c->blocksize - c->unused ); - memcpy(c->iv, c->lastiv + c->blocksize - c->unused, c->unused); + memmove(c->u_iv.iv + c->unused, c->u_iv.iv, c->blocksize - c->unused ); + memcpy(c->u_iv.iv, c->lastiv + c->blocksize - c->unused, c->unused); c->unused = 0; } } diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 644b75c42..a67cbc56f 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,5 +1,5 @@ /* Rijndael (AES) for GnuPG - * Copyright (C) 2000, 2001 Free Software Foundation, Inc. + * Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc. * * This file is part of GnuPG. * @@ -1866,86 +1866,119 @@ prepare_decryption( RIJNDAEL_context *ctx ) -/* Encrypt one block. A and B may be the same. */ +/* Encrypt one block. A and B need to be aligned on a 4 byte + boundary. A and B may be the same. */ static void -do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a) +do_encrypt_aligned (const RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) { - int r; - byte temp[4][4]; - int ROUNDS = ctx->ROUNDS; #define rk (ctx->keySched) + int ROUNDS = ctx->ROUNDS; + int r; + union + { + u32 tempu32[4]; /* Force correct alignment. */ + byte temp[4][4]; + } u; - *((u32*)temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]); - *((u32*)temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]); - *((u32*)temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]); - *((u32*)temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]); - *((u32*)(b )) = *((u32*)T1[temp[0][0]]) - ^ *((u32*)T2[temp[1][1]]) - ^ *((u32*)T3[temp[2][2]]) - ^ *((u32*)T4[temp[3][3]]); - *((u32*)(b + 4)) = *((u32*)T1[temp[1][0]]) - ^ *((u32*)T2[temp[2][1]]) - ^ *((u32*)T3[temp[3][2]]) - ^ *((u32*)T4[temp[0][3]]); - *((u32*)(b + 8)) = *((u32*)T1[temp[2][0]]) - ^ *((u32*)T2[temp[3][1]]) - ^ *((u32*)T3[temp[0][2]]) - ^ *((u32*)T4[temp[1][3]]); - *((u32*)(b +12)) = *((u32*)T1[temp[3][0]]) - ^ *((u32*)T2[temp[0][1]]) - ^ *((u32*)T3[temp[1][2]]) - ^ *((u32*)T4[temp[2][3]]); - for (r = 1; r < ROUNDS-1; r++) { - *((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); - *((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); - *((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); - *((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); + *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]); + *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]); + *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]); + *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]); + *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + ^ *((u32*)T2[u.temp[1][1]]) + ^ *((u32*)T3[u.temp[2][2]]) + ^ *((u32*)T4[u.temp[3][3]])); + *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + ^ *((u32*)T2[u.temp[2][1]]) + ^ *((u32*)T3[u.temp[3][2]]) + ^ *((u32*)T4[u.temp[0][3]])); + *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + ^ *((u32*)T2[u.temp[3][1]]) + ^ *((u32*)T3[u.temp[0][2]]) + ^ *((u32*)T4[u.temp[1][3]])); + *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + ^ *((u32*)T2[u.temp[0][1]]) + ^ *((u32*)T3[u.temp[1][2]]) + ^ *((u32*)T4[u.temp[2][3]])); - *((u32*)(b )) = *((u32*)T1[temp[0][0]]) - ^ *((u32*)T2[temp[1][1]]) - ^ *((u32*)T3[temp[2][2]]) - ^ *((u32*)T4[temp[3][3]]); - *((u32*)(b + 4)) = *((u32*)T1[temp[1][0]]) - ^ *((u32*)T2[temp[2][1]]) - ^ *((u32*)T3[temp[3][2]]) - ^ *((u32*)T4[temp[0][3]]); - *((u32*)(b + 8)) = *((u32*)T1[temp[2][0]]) - ^ *((u32*)T2[temp[3][1]]) - ^ *((u32*)T3[temp[0][2]]) - ^ *((u32*)T4[temp[1][3]]); - *((u32*)(b +12)) = *((u32*)T1[temp[3][0]]) - ^ *((u32*)T2[temp[0][1]]) - ^ *((u32*)T3[temp[1][2]]) - ^ *((u32*)T4[temp[2][3]]); + for (r = 1; r < ROUNDS-1; r++) + { + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); + + *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + ^ *((u32*)T2[u.temp[1][1]]) + ^ *((u32*)T3[u.temp[2][2]]) + ^ *((u32*)T4[u.temp[3][3]])); + *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + ^ *((u32*)T2[u.temp[2][1]]) + ^ *((u32*)T3[u.temp[3][2]]) + ^ *((u32*)T4[u.temp[0][3]])); + *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + ^ *((u32*)T2[u.temp[3][1]]) + ^ *((u32*)T3[u.temp[0][2]]) + ^ *((u32*)T4[u.temp[1][3]])); + *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + ^ *((u32*)T2[u.temp[0][1]]) + ^ *((u32*)T3[u.temp[1][2]]) + ^ *((u32*)T4[u.temp[2][3]])); } - /* last round is special */ - *((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]); - *((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); - *((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); - *((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]); - b[ 0] = T1[temp[0][0]][1]; - b[ 1] = T1[temp[1][1]][1]; - b[ 2] = T1[temp[2][2]][1]; - b[ 3] = T1[temp[3][3]][1]; - b[ 4] = T1[temp[1][0]][1]; - b[ 5] = T1[temp[2][1]][1]; - b[ 6] = T1[temp[3][2]][1]; - b[ 7] = T1[temp[0][3]][1]; - b[ 8] = T1[temp[2][0]][1]; - b[ 9] = T1[temp[3][1]][1]; - b[10] = T1[temp[0][2]][1]; - b[11] = T1[temp[1][3]][1]; - b[12] = T1[temp[3][0]][1]; - b[13] = T1[temp[0][1]][1]; - b[14] = T1[temp[1][2]][1]; - b[15] = T1[temp[2][3]][1]; - *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]); - *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]); - *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]); - *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]); + + /* Last round is special. */ + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]); + b[ 0] = T1[u.temp[0][0]][1]; + b[ 1] = T1[u.temp[1][1]][1]; + b[ 2] = T1[u.temp[2][2]][1]; + b[ 3] = T1[u.temp[3][3]][1]; + b[ 4] = T1[u.temp[1][0]][1]; + b[ 5] = T1[u.temp[2][1]][1]; + b[ 6] = T1[u.temp[3][2]][1]; + b[ 7] = T1[u.temp[0][3]][1]; + b[ 8] = T1[u.temp[2][0]][1]; + b[ 9] = T1[u.temp[3][1]][1]; + b[10] = T1[u.temp[0][2]][1]; + b[11] = T1[u.temp[1][3]][1]; + b[12] = T1[u.temp[3][0]][1]; + b[13] = T1[u.temp[0][1]][1]; + b[14] = T1[u.temp[1][2]][1]; + b[15] = T1[u.temp[2][3]][1]; + *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]); + *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]); + *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]); + *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]); #undef rk } + +static void +do_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ + /* BX and AX are not necessary correctly aligned. Thus we need to + copy them here. */ + union + { + u32 dummy[4]; + byte a[16]; + } a; + union + { + u32 dummy[4]; + byte b[16]; + } b; + + memcpy (a.a, ax, 16); + do_encrypt_aligned (ctx, b.b, a.a); + memcpy (bx, b.b, 16); +} + + static void rijndael_encrypt (void *ctx, byte *b, const byte *a) { @@ -1953,56 +1986,6 @@ rijndael_encrypt (void *ctx, byte *b, const byte *a) burn_stack (16 + 2*sizeof(int)); } -#if 0 -/* Experimental code. Needs to be generalized and we might want to - have variants for all possible sizes of the largest scalar type. - Also need to make sure that INBUF and OUTBUF are properlu - aligned. */ -void -rijndael_cfb_encrypt (void *ctx, byte *iv, - byte *outbuf, const byte *inbuf, size_t nbytes) -{ -/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */ -/* { */ - /* Not properly aligned, use the slow version. Actually the - compiler might even optimize it this pretty well if the - target CPU has relaxed alignment requirements. Thus it is - questionable whether we should at all go into the hassles of - doing alignment wise optimizations by ourself. A quick test - with gcc 4.0 on ia32 did showed any advantages. */ - byte *ivp; - int i; - - while (nbytes >= 16) - { - do_encrypt (ctx, iv, iv); - for (i=0, ivp = iv; i < 16; i++) - *outbuf++ = (*ivp++ ^= *inbuf++); - nbytes -= 16; - } -/* } */ -/* else */ -/* { */ -/* u32 *ivp; */ -/* u32 *ob = (u32*)outbuf; */ -/* const u32 *ib = (const u32*)inbuf; */ - -/* while (nbytes >= 16) */ -/* { */ -/* do_encrypt (ctx, iv, iv); */ -/* ivp = iv; */ -/* *ob++ = (*ivp++ ^= *ib++); */ -/* *ob++ = (*ivp++ ^= *ib++); */ -/* *ob++ = (*ivp++ ^= *ib++); */ -/* *ob++ = (*ivp ^= *ib++); */ -/* nbytes -= 16; */ -/* } */ -/* } */ - burn_stack (16 + 2*sizeof(int)); -} -#endif - - /* Decrypt one block. a and b may be the same. */ @@ -2097,6 +2080,67 @@ rijndael_decrypt (void *ctx, byte *b, const byte *a) do_decrypt (ctx, b, a); burn_stack (16+2*sizeof(int)); } + + + +/* Bulk encryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +rijndael_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the IV. */ + do_encrypt_aligned (ctx, iv, iv); + /* XOR the input with the IV and store input into IV. */ + for (ivp=iv,i=0; i < 16; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + } + + burn_stack (16 + 2*sizeof(int)); +} + +/* Bulk decryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unisgned lonhg boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +rijndael_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + unsigned char temp; + int i; + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_aligned (ctx, iv, iv); + for (ivp=iv,i=0; i < 16; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + + burn_stack (16 + 2*sizeof(int)); +} + + /* Test a single encryption and decryption with each key size. */ diff --git a/configure.ac b/configure.ac index 9974ca1e4..340f9eb33 100644 --- a/configure.ac +++ b/configure.ac @@ -25,7 +25,7 @@ min_automake_version="1.9.3" # Remember to change the version number immediately *after* a release. # Set my_issvn to "yes" for non-released code. Remember to run an # "svn up" and "autogen.sh --force" right before creating a distribution. -m4_define([my_version], [1.4.9rc1]) +m4_define([my_version], [1.4.9]) m4_define([my_issvn], [yes]) m4_define([svn_revision], m4_esyscmd([echo $((svn info 2>/dev/null \ diff --git a/doc/ChangeLog b/doc/ChangeLog index d7bf91b2b..80dd2bc8e 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,9 @@ +2007-12-21 Werner Koch + + * README.W32: Tell that Vista is supported and that at least NT-4 + is required. It might still work on older systems, but I don't + know for sure. + 2007-12-12 Werner Koch * gpg.texi, specify-user-id.texi: Update from gnupg-2. diff --git a/doc/README.W32 b/doc/README.W32 index b734cb32b..8abfc24a7 100644 --- a/doc/README.W32 +++ b/doc/README.W32 @@ -1,7 +1,8 @@ README.W32 -*- text -*- -This is a binary package with GnuPG for MS-Windows 95, 98, WNT, W2000 -and XP. See the file README for generic instructions and usage hints. +This is a binary package with GnuPG for MS-Windows NT-4, W2000, XP and +Vista. A native version for 64 bit is not available. See the file +README for generic instructions and usage hints. A FAQ comes with this package and a probably more recent one can be found online at http://www.gnupg.org/faq.html. See diff --git a/scripts/ChangeLog b/scripts/ChangeLog index 92d5999ec..a6b536f12 100644 --- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,7 @@ +2008-01-30 Werner Koch + + * w32installer.nsi: Set the OutPath back. + 2007-12-12 Werner Koch * config.sub, config.guess: Update to version 2007-11-19. diff --git a/scripts/w32installer.nsi b/scripts/w32installer.nsi index 327182d5b..841850c20 100644 --- a/scripts/w32installer.nsi +++ b/scripts/w32installer.nsi @@ -351,6 +351,8 @@ Section "-Finish" WriteRegStr HKCU "Software\GNU\GnuPG" "Lang" $R3 ;; + # Set the Outpath pack so that the README file can be displayed. + SetOutPath "$INSTDIR" SectionEnd ; "-Finish"