Experimental code to improve AES performance. Got about 25% on ia32.

This commit is contained in:
Werner Koch 2005-08-11 16:57:29 +00:00
parent 2e38eab93c
commit 0a3eda24ee
3 changed files with 76 additions and 1 deletions

View File

@ -1,3 +1,9 @@
2005-08-11 Werner Koch <wk@g10code.com>
* rijndael.c (rijndael_cfb_encrypt): Experimental code to improve
AES performance. Got about 25% on ia32.
* cipher.c (do_cfb_encrypt): Ditto.
2005-06-07 David Shaw <dshaw@jabberwocky.com>
* random.c: Fix prototype of the fast random gatherer. Noted by

View File

@ -536,7 +536,25 @@ do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
/* now we can process complete blocks */
/* Now we can process complete blocks. */
#if 0
/* Experimental code. We may only use this for standard CFB
because for Phil's mode we need to save the IV of before the
last encryption - we don't want to do this in tghe fasf CFB
encryption routine. */
if (c->algo == CIPHER_ALGO_AES
&& nbytes >= blocksize
&& c->mode != CIPHER_MODE_PHILS_CFB) {
size_t n;
memcpy( c->lastiv, c->iv, blocksize );
n = (nbytes / blocksize) * blocksize;
rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
inbuf += n;
outbuf += n;
nbytes -= n;
}
#endif
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */

View File

@ -1955,6 +1955,57 @@ rijndael_encrypt (void *ctx, byte *b, const byte *a)
burn_stack (16 + 2*sizeof(int));
}
#if 0
/* Experimental code. Needs to be generalized and we might want to
have variants for all possible sizes of the largest scalar type.
Also need to make sure that INBUF and OUTBUF are properlu
aligned. */
void
rijndael_cfb_encrypt (void *ctx, byte *iv,
byte *outbuf, const byte *inbuf, size_t nbytes)
{
/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */
/* { */
/* Not properly aligned, use the slow version. Actually the
compiler might even optimize it this pretty well if the
target CPU has relaxed alignment requirements. Thus it is
questionable whether we should at all go into the hassles of
doing alignment wise optimizations by ourself. A quick test
with gcc 4.0 on ia32 did showed any advantages. */
byte *ivp;
int i;
while (nbytes >= 16)
{
do_encrypt (ctx, iv, iv);
for (i=0, ivp = iv; i < 16; i++)
*outbuf++ = (*ivp++ ^= *inbuf++);
nbytes -= 16;
}
/* } */
/* else */
/* { */
/* u32 *ivp; */
/* u32 *ob = (u32*)outbuf; */
/* const u32 *ib = (const u32*)inbuf; */
/* while (nbytes >= 16) */
/* { */
/* do_encrypt (ctx, iv, iv); */
/* ivp = iv; */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp++ ^= *ib++); */
/* *ob++ = (*ivp ^= *ib++); */
/* nbytes -= 16; */
/* } */
/* } */
burn_stack (16 + 2*sizeof(int));
}
#endif
/* Decrypt one block. a and b may be the same. */
static void