1
0
mirror of git://git.gnupg.org/gnupg.git synced 2025-01-03 12:11:33 +01:00
gnupg/mpi/mpi-pow.c
Werner Koch cad8216f9a mpi: mpi-pow improvements
* mpi/mpi-pow.c (USE_ALGORITHM_SIMPLE_EXPONENTIATION): New.
(mul_mod) [!USE_ALGORITHM_SIMPLE_EXPONENTIATION]: New.
(mpi_powm) [!USE_ALGORITHM_SIMPLE_EXPONENTIATION]: New implementation
of left-to-right k-ary exponentiation.
--

This is a backport from Libgcrypt commit
45aa6131e93fac89d46733b3436d960f35fb99b2

    Signed-off-by: NIIBE Yutaka <gniibe@fsij.org>

    For the Yarom/Falkner flush+reload cache side-channel attack, we
    changed the code so that it always calls the multiplication
    routine (even if we can skip it to get result).  This results some
    performance regression.

    This change is for recovering performance with efficient
    algorithm.

Signed-off-by: Werner Koch <wk@gnupg.org>
2013-10-18 10:54:55 +02:00

746 lines
20 KiB
C

/* mpi-pow.c - MPI functions
* Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
* Copyright (C) 2013 Werner Koch
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Note: This code is heavily based on the GNU MP Library.
* Actually it's the same code with only minor changes in the
* way the data is stored; this is to support the abstraction
* of an optional secure memory allocation which may be used
* to avoid revealing of sensitive data due to paging etc.
* The GNU MP Library itself is published under the LGPL;
* however I decided to publish this code under the plain GPL.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi-internal.h"
#include "longlong.h"
#include <assert.h>
/*
* When you need old implementation, please add compilation option
* -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
* or expose this line:
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
*/
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
/****************
* RES = BASE ^ EXP mod MOD
*/
void
mpi_powm( MPI res, MPI base, MPI exponent, MPI mod)
{
mpi_ptr_t rp, ep, mp, bp;
mpi_size_t esize, msize, bsize, rsize;
int msign, bsign, rsign;
int esec, msec, bsec, rsec;
mpi_size_t size;
int mod_shift_cnt;
int negative_result;
mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL;
mpi_ptr_t xp_marker=NULL;
int assign_rp=0;
mpi_ptr_t tspace = NULL;
mpi_size_t tsize=0; /* to avoid compiler warning */
/* fixme: we should check that the warning is void*/
esize = exponent->nlimbs;
msize = mod->nlimbs;
size = 2 * msize;
msign = mod->sign;
esec = mpi_is_secure(exponent);
msec = mpi_is_secure(mod);
bsec = mpi_is_secure(base);
rsec = mpi_is_secure(res);
rp = res->d;
ep = exponent->d;
if( !msize )
msize = 1 / msize; /* provoke a signal */
if( !esize ) {
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
* depending on if MOD equals 1. */
rp[0] = 1;
res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
res->sign = 0;
goto leave;
}
/* Normalize MOD (i.e. make its most significant bit set) as required by
* mpn_divrem. This will make the intermediate values in the calculation
* slightly larger, but the correct result is obtained after a final
* reduction using the original MOD value. */
mp = mp_marker = mpi_alloc_limb_space(msize, msec);
count_leading_zeros( mod_shift_cnt, mod->d[msize-1] );
if( mod_shift_cnt )
mpihelp_lshift( mp, mod->d, msize, mod_shift_cnt );
else
MPN_COPY( mp, mod->d, msize );
bsize = base->nlimbs;
bsign = base->sign;
if( bsize > msize ) { /* The base is larger than the module. Reduce it. */
/* Allocate (BSIZE + 1) with space for remainder and quotient.
* (The quotient is (bsize - msize + 1) limbs.) */
bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
MPN_COPY( bp, base->d, bsize );
/* We don't care about the quotient, store it above the remainder,
* at BP + MSIZE. */
mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
bsize = msize;
/* Canonicalize the base, since we are going to multiply with it
* quite a few times. */
MPN_NORMALIZE( bp, bsize );
}
else
bp = base->d;
if( !bsize ) {
res->nlimbs = 0;
res->sign = 0;
goto leave;
}
if( res->alloced < size ) {
/* We have to allocate more space for RES. If any of the input
* parameters are identical to RES, defer deallocation of the old
* space. */
if( rp == ep || rp == mp || rp == bp ) {
rp = mpi_alloc_limb_space( size, rsec );
assign_rp = 1;
}
else {
mpi_resize( res, size );
rp = res->d;
}
}
else { /* Make BASE, EXPONENT and MOD not overlap with RES. */
if( rp == bp ) {
/* RES and BASE are identical. Allocate temp. space for BASE. */
assert( !bp_marker );
bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
MPN_COPY(bp, rp, bsize);
}
if( rp == ep ) {
/* RES and EXPONENT are identical.
Allocate temp. space for EXPONENT. */
ep = ep_marker = mpi_alloc_limb_space( esize, esec );
MPN_COPY(ep, rp, esize);
}
if( rp == mp ) {
/* RES and MOD are identical. Allocate temporary space for MOD.*/
assert( !mp_marker );
mp = mp_marker = mpi_alloc_limb_space( msize, msec );
MPN_COPY(mp, rp, msize);
}
}
MPN_COPY( rp, bp, bsize );
rsize = bsize;
rsign = bsign;
{
mpi_size_t i;
mpi_ptr_t xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
struct karatsuba_ctx karactx;
memset( &karactx, 0, sizeof karactx );
negative_result = (ep[0] & 1) && base->sign;
i = esize - 1;
e = ep[i];
count_leading_zeros (c, e);
e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
c = BITS_PER_MPI_LIMB - 1 - c;
/* Main loop.
*
* Make the result be pointed to alternately by XP and RP. This
* helps us avoid block copying, which would otherwise be necessary
* with the overlap restrictions of mpihelp_divmod. With 50% probability
* the result after this loop will be in the area originally pointed
* by RP (==RES->d), and with 50% probability in the area originally
* pointed to by XP.
*/
for(;;) {
while( c ) {
mpi_ptr_t tp;
mpi_size_t xsize;
/*mpihelp_mul_n(xp, rp, rp, rsize);*/
if( rsize < KARATSUBA_THRESHOLD )
mpih_sqr_n_basecase( xp, rp, rsize );
else {
if( !tspace ) {
tsize = 2 * rsize;
tspace = mpi_alloc_limb_space( tsize, 0 );
}
else if( tsize < (2*rsize) ) {
mpi_free_limb_space( tspace );
tsize = 2 * rsize;
tspace = mpi_alloc_limb_space( tsize, 0 );
}
mpih_sqr_n( xp, rp, rsize, tspace );
}
xsize = 2 * rsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
xsize = msize;
}
tp = rp; rp = xp; xp = tp;
rsize = xsize;
/* To mitigate the Yarom/Falkner flush+reload cache
* side-channel attack on the RSA secret exponent, we
* do the multiplication regardless of the value of
* the high-bit of E. But to avoid this performance
* penalty we do it only if the exponent has been
* stored in secure memory and we can thus assume it
* is a secret exponent. */
if (esec || (mpi_limb_signed_t)e < 0) {
/*mpihelp_mul( xp, rp, rsize, bp, bsize );*/
if( bsize < KARATSUBA_THRESHOLD ) {
mpihelp_mul( xp, rp, rsize, bp, bsize );
}
else {
mpihelp_mul_karatsuba_case(
xp, rp, rsize, bp, bsize, &karactx );
}
xsize = rsize + bsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
xsize = msize;
}
}
if ((mpi_limb_signed_t)e < 0) {
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
e <<= 1;
c--;
}
i--;
if( i < 0 )
break;
e = ep[i];
c = BITS_PER_MPI_LIMB;
}
/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
* steps. Adjust the result by reducing it with the original MOD.
*
* Also make sure the result is put in RES->d (where it already
* might be, see above).
*/
if( mod_shift_cnt ) {
carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
rp = res->d;
if( carry_limb ) {
rp[rsize] = carry_limb;
rsize++;
}
}
else {
MPN_COPY( res->d, rp, rsize);
rp = res->d;
}
if( rsize >= msize ) {
mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
rsize = msize;
}
/* Remove any leading zero words from the result. */
if( mod_shift_cnt )
mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE (rp, rsize);
mpihelp_release_karatsuba_ctx( &karactx );
}
if( negative_result && rsize ) {
if( mod_shift_cnt )
mpihelp_rshift( mp, mp, msize, mod_shift_cnt);
mpihelp_sub( rp, mp, msize, rp, rsize);
rsize = msize;
rsign = msign;
MPN_NORMALIZE(rp, rsize);
}
res->nlimbs = rsize;
res->sign = rsign;
leave:
if( assign_rp ) mpi_assign_limb_space( res, rp, size );
if( mp_marker ) mpi_free_limb_space( mp_marker );
if( bp_marker ) mpi_free_limb_space( bp_marker );
if( ep_marker ) mpi_free_limb_space( ep_marker );
if( xp_marker ) mpi_free_limb_space( xp_marker );
if( tspace ) mpi_free_limb_space( tspace );
}
#else /*!USE_ALGORITHM_SIMPLE_EXPONENTIATION */
/**
* Internal function to compute
*
* X = R * S mod M
*
* and set the size of X at the pointer XSIZE_P.
* Use karatsuba structure at KARACTX_P.
*
* Condition:
* RSIZE >= SSIZE
* Enough space for X is allocated beforehand.
*
* For generic cases, we can/should use mpi_mulm.
* This function is use for specific internal case.
*/
static void
mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
mpi_ptr_t rp, mpi_size_t rsize,
mpi_ptr_t sp, mpi_size_t ssize,
mpi_ptr_t mp, mpi_size_t msize,
struct karatsuba_ctx *karactx_p)
{
if( ssize < KARATSUBA_THRESHOLD )
mpihelp_mul ( xp, rp, rsize, sp, ssize );
else
mpihelp_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
if (rsize + ssize > msize)
{
mpihelp_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
*xsize_p = msize;
}
else
*xsize_p = rsize + ssize;
}
#define SIZE_B_2I3 ((1 << (5 - 1)) - 1)
/****************
* RES = BASE ^ EXPO mod MOD
*
* To mitigate the Yarom/Falkner flush+reload cache side-channel
* attack on the RSA secret exponent, we don't use the square
* routine but multiplication.
*
* Reference:
* Handbook of Applied Cryptography
* Algorithm 14.83: Modified left-to-right k-ary exponentiation
*/
void
mpi_powm (MPI res, MPI base, MPI expo, MPI mod)
{
/* Pointer to the limbs of the arguments, their size and signs. */
mpi_ptr_t rp, ep, mp, bp;
mpi_size_t esize, msize, bsize, rsize;
int msign, bsign, rsign;
/* Flags telling the secure allocation status of the arguments. */
int esec, msec, bsec;
/* Size of the result including space for temporary values. */
mpi_size_t size;
/* Helper. */
int mod_shift_cnt;
int negative_result;
mpi_ptr_t mp_marker = NULL;
mpi_ptr_t bp_marker = NULL;
mpi_ptr_t ep_marker = NULL;
mpi_ptr_t xp_marker = NULL;
mpi_ptr_t b_2i3[SIZE_B_2I3]; /* Pre-computed array: BASE^3, ^5, ^7, ... */
mpi_size_t b_2i3size[SIZE_B_2I3];
mpi_size_t W;
mpi_ptr_t base_u;
mpi_size_t base_u_size;
esize = expo->nlimbs;
msize = mod->nlimbs;
size = 2 * msize;
msign = mod->sign;
if (esize * BITS_PER_MPI_LIMB > 512)
W = 5;
else if (esize * BITS_PER_MPI_LIMB > 256)
W = 4;
else if (esize * BITS_PER_MPI_LIMB > 128)
W = 3;
else if (esize * BITS_PER_MPI_LIMB > 64)
W = 2;
else
W = 1;
esec = mpi_is_secure(expo);
msec = mpi_is_secure(mod);
bsec = mpi_is_secure(base);
rp = res->d;
ep = expo->d;
if (!msize)
msize = 1 / msize; /* provoke a signal */
if (!esize)
{
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
on if MOD equals 1. */
res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
if (res->nlimbs)
{
RESIZE_IF_NEEDED (res, 1);
rp = res->d;
rp[0] = 1;
}
res->sign = 0;
goto leave;
}
/* Normalize MOD (i.e. make its most significant bit set) as
required by mpn_divrem. This will make the intermediate values
in the calculation slightly larger, but the correct result is
obtained after a final reduction using the original MOD value. */
mp = mp_marker = mpi_alloc_limb_space(msize, msec);
count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
if (mod_shift_cnt)
mpihelp_lshift (mp, mod->d, msize, mod_shift_cnt);
else
MPN_COPY( mp, mod->d, msize );
bsize = base->nlimbs;
bsign = base->sign;
if (bsize > msize)
{
/* The base is larger than the module. Reduce it.
Allocate (BSIZE + 1) with space for remainder and quotient.
(The quotient is (bsize - msize + 1) limbs.) */
bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
MPN_COPY ( bp, base->d, bsize );
/* We don't care about the quotient, store it above the
* remainder, at BP + MSIZE. */
mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
bsize = msize;
/* Canonicalize the base, since we are going to multiply with it
quite a few times. */
MPN_NORMALIZE( bp, bsize );
}
else
bp = base->d;
if (!bsize)
{
res->nlimbs = 0;
res->sign = 0;
goto leave;
}
/* Make BASE, EXPO and MOD not overlap with RES. */
if ( rp == bp )
{
/* RES and BASE are identical. Allocate temp. space for BASE. */
assert (!bp_marker);
bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
MPN_COPY(bp, rp, bsize);
}
if ( rp == ep )
{
/* RES and EXPO are identical. Allocate temp. space for EXPO. */
ep = ep_marker = mpi_alloc_limb_space( esize, esec );
MPN_COPY(ep, rp, esize);
}
if ( rp == mp )
{
/* RES and MOD are identical. Allocate temporary space for MOD.*/
assert (!mp_marker);
mp = mp_marker = mpi_alloc_limb_space( msize, msec );
MPN_COPY(mp, rp, msize);
}
/* Copy base to the result. */
if (res->alloced < size)
{
mpi_resize (res, size);
rp = res->d;
}
/* Main processing. */
{
mpi_size_t i, j;
mpi_ptr_t xp;
mpi_size_t xsize;
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
struct karatsuba_ctx karactx;
mpi_ptr_t tp;
xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
memset( &karactx, 0, sizeof karactx );
negative_result = (ep[0] & 1) && bsign;
/* Precompute B_2I3[], BASE^(2 * i + 3), BASE^3, ^5, ^7, ... */
if (W > 1) /* X := BASE^2 */
mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
for (i = 0; i < (1 << (W - 1)) - 1; i++)
{ /* B_2I3[i] = BASE^(2 * i + 3) */
if (i == 0)
{
base_u = bp;
base_u_size = bsize;
}
else
{
base_u = b_2i3[i-1];
base_u_size = b_2i3size[i-1];
}
if (xsize >= base_u_size)
mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
mp, msize, &karactx);
else
mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
mp, msize, &karactx);
b_2i3[i] = mpi_alloc_limb_space (rsize, esec);
b_2i3size[i] = rsize;
MPN_COPY (b_2i3[i], rp, rsize);
}
i = esize - 1;
/* Main loop.
Make the result be pointed to alternately by XP and RP. This
helps us avoid block copying, which would otherwise be
necessary with the overlap restrictions of mpihelp_divmod. With
50% probability the result after this loop will be in the area
originally pointed by RP (==RES->d), and with 50% probability
in the area originally pointed to by XP. */
rsign = 0;
if (W == 1)
{
rsize = bsize;
}
else
{
rsize = msize;
MPN_ZERO (rp, rsize);
}
MPN_COPY ( rp, bp, bsize );
e = ep[i];
count_leading_zeros (c, e);
e = (e << c) << 1;
c = BITS_PER_MPI_LIMB - 1 - c;
j = 0;
for (;;)
if (e == 0)
{
j += c;
i--;
if ( i < 0 )
{
c = 0;
break;
}
e = ep[i];
c = BITS_PER_MPI_LIMB;
}
else
{
int c0;
mpi_limb_t e0;
count_leading_zeros (c0, e);
e = (e << c0);
c -= c0;
j += c0;
if (c >= W)
{
e0 = (e >> (BITS_PER_MPI_LIMB - W));
e = (e << W);
c -= W;
}
else
{
i--;
if ( i < 0 )
{
e = (e >> (BITS_PER_MPI_LIMB - c));
break;
}
c0 = c;
e0 = (e >> (BITS_PER_MPI_LIMB - W))
| (ep[i] >> (BITS_PER_MPI_LIMB - W + c0));
e = (ep[i] << (W - c0));
c = BITS_PER_MPI_LIMB - W + c0;
}
count_trailing_zeros (c0, e0);
e0 = (e0 >> c0) >> 1;
for (j += W - c0; j; j--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
if (e0 == 0)
{
base_u = bp;
base_u_size = bsize;
}
else
{
base_u = b_2i3[e0 - 1];
base_u_size = b_2i3size[e0 -1];
}
mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
j = c0;
}
if (c != 0)
{
j += c;
count_trailing_zeros (c, e);
e = (e >> c);
j -= c;
}
while (j--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
if (e != 0)
{
if ((e>>1) == 0)
{
base_u = bp;
base_u_size = bsize;
}
else
{
base_u = b_2i3[(e>>1) - 1];
base_u_size = b_2i3size[(e>>1) -1];
}
mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
for (; c; c--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
}
/* We shifted MOD, the modulo reduction argument, left
MOD_SHIFT_CNT steps. Adjust the result by reducing it with the
original MOD.
Also make sure the result is put in RES->d (where it already
might be, see above). */
if ( mod_shift_cnt )
{
carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
rp = res->d;
if ( carry_limb )
{
rp[rsize] = carry_limb;
rsize++;
}
}
else if (res->d != rp)
{
MPN_COPY (res->d, rp, rsize);
rp = res->d;
}
if ( rsize >= msize )
{
mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
rsize = msize;
}
/* Remove any leading zero words from the result. */
if ( mod_shift_cnt )
mpihelp_rshift (rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE (rp, rsize);
mpihelp_release_karatsuba_ctx (&karactx );
for (i = 0; i < (1 << (W - 1)) - 1; i++)
mpi_free_limb_space (b_2i3[i]);
}
/* Fixup for negative results. */
if ( negative_result && rsize )
{
if ( mod_shift_cnt )
mpihelp_rshift (mp, mp, msize, mod_shift_cnt);
mpihelp_sub (rp, mp, msize, rp, rsize);
rsize = msize;
rsign = msign;
MPN_NORMALIZE(rp, rsize);
}
assert (res->d == rp);
res->nlimbs = rsize;
res->sign = rsign;
leave:
if (mp_marker)
mpi_free_limb_space (mp_marker);
if (bp_marker)
mpi_free_limb_space (bp_marker);
if (ep_marker)
mpi_free_limb_space (ep_marker);
if (xp_marker)
mpi_free_limb_space (xp_marker);
}
#endif /*!USE_ALGORITHM_SIMPLE_EXPONENTIATION */