1
0
mirror of git://git.gnupg.org/gnupg.git synced 2024-12-22 10:19:57 +01:00
gnupg/mpi/mpi-pow.c
NIIBE Yutaka 6cbc75e712 mpi: Avoid data-dependent timing variations in mpi_powm.
* include/mpi.h, mpi/mpiutils.c (mpi_set_cond): New.
* mpi/mpi-pow.c (SIZE_PRECOMP): Rename from SIZE_B_2I3.
(mpi_powm): Access all data in the table and use mpi_set_cond.

--

Access to the precomputed table was indexed by a portion of EXPO,
which could be mounted by a side channel attack.  This change fixes
this particular data-dependent access pattern.
2015-02-26 21:00:05 +09:00

759 lines
21 KiB
C

/* mpi-pow.c - MPI functions
* Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
* Copyright (C) 2013 Werner Koch
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Note: This code is heavily based on the GNU MP Library.
* Actually it's the same code with only minor changes in the
* way the data is stored; this is to support the abstraction
* of an optional secure memory allocation which may be used
* to avoid revealing of sensitive data due to paging etc.
* The GNU MP Library itself is published under the LGPL;
* however I decided to publish this code under the plain GPL.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi-internal.h"
#include "longlong.h"
#include <assert.h>
/*
* When you need old implementation, please add compilation option
* -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
* or expose this line:
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
*/
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
/****************
* RES = BASE ^ EXP mod MOD
*/
void
mpi_powm( MPI res, MPI base, MPI exponent, MPI mod)
{
mpi_ptr_t rp, ep, mp, bp;
mpi_size_t esize, msize, bsize, rsize;
int msign, bsign, rsign;
int esec, msec, bsec, rsec;
mpi_size_t size;
int mod_shift_cnt;
int negative_result;
mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL;
mpi_ptr_t xp_marker=NULL;
int assign_rp=0;
mpi_ptr_t tspace = NULL;
mpi_size_t tsize=0; /* to avoid compiler warning */
/* fixme: we should check that the warning is void*/
esize = exponent->nlimbs;
msize = mod->nlimbs;
size = 2 * msize;
msign = mod->sign;
esec = mpi_is_secure(exponent);
msec = mpi_is_secure(mod);
bsec = mpi_is_secure(base);
rsec = mpi_is_secure(res);
rp = res->d;
ep = exponent->d;
if( !msize )
msize = 1 / msize; /* provoke a signal */
if( !esize ) {
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
* depending on if MOD equals 1. */
rp[0] = 1;
res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
res->sign = 0;
goto leave;
}
/* Normalize MOD (i.e. make its most significant bit set) as required by
* mpn_divrem. This will make the intermediate values in the calculation
* slightly larger, but the correct result is obtained after a final
* reduction using the original MOD value. */
mp = mp_marker = mpi_alloc_limb_space(msize, msec);
count_leading_zeros( mod_shift_cnt, mod->d[msize-1] );
if( mod_shift_cnt )
mpihelp_lshift( mp, mod->d, msize, mod_shift_cnt );
else
MPN_COPY( mp, mod->d, msize );
bsize = base->nlimbs;
bsign = base->sign;
if( bsize > msize ) { /* The base is larger than the module. Reduce it. */
/* Allocate (BSIZE + 1) with space for remainder and quotient.
* (The quotient is (bsize - msize + 1) limbs.) */
bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
MPN_COPY( bp, base->d, bsize );
/* We don't care about the quotient, store it above the remainder,
* at BP + MSIZE. */
mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
bsize = msize;
/* Canonicalize the base, since we are going to multiply with it
* quite a few times. */
MPN_NORMALIZE( bp, bsize );
}
else
bp = base->d;
if( !bsize ) {
res->nlimbs = 0;
res->sign = 0;
goto leave;
}
if( res->alloced < size ) {
/* We have to allocate more space for RES. If any of the input
* parameters are identical to RES, defer deallocation of the old
* space. */
if( rp == ep || rp == mp || rp == bp ) {
rp = mpi_alloc_limb_space( size, rsec );
assign_rp = 1;
}
else {
mpi_resize( res, size );
rp = res->d;
}
}
else { /* Make BASE, EXPONENT and MOD not overlap with RES. */
if( rp == bp ) {
/* RES and BASE are identical. Allocate temp. space for BASE. */
assert( !bp_marker );
bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
MPN_COPY(bp, rp, bsize);
}
if( rp == ep ) {
/* RES and EXPONENT are identical.
Allocate temp. space for EXPONENT. */
ep = ep_marker = mpi_alloc_limb_space( esize, esec );
MPN_COPY(ep, rp, esize);
}
if( rp == mp ) {
/* RES and MOD are identical. Allocate temporary space for MOD.*/
assert( !mp_marker );
mp = mp_marker = mpi_alloc_limb_space( msize, msec );
MPN_COPY(mp, rp, msize);
}
}
MPN_COPY( rp, bp, bsize );
rsize = bsize;
rsign = bsign;
{
mpi_size_t i;
mpi_ptr_t xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
struct karatsuba_ctx karactx;
memset( &karactx, 0, sizeof karactx );
negative_result = (ep[0] & 1) && base->sign;
i = esize - 1;
e = ep[i];
count_leading_zeros (c, e);
e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
c = BITS_PER_MPI_LIMB - 1 - c;
/* Main loop.
*
* Make the result be pointed to alternately by XP and RP. This
* helps us avoid block copying, which would otherwise be necessary
* with the overlap restrictions of mpihelp_divmod. With 50% probability
* the result after this loop will be in the area originally pointed
* by RP (==RES->d), and with 50% probability in the area originally
* pointed to by XP.
*/
for(;;) {
while( c ) {
mpi_ptr_t tp;
mpi_size_t xsize;
/*mpihelp_mul_n(xp, rp, rp, rsize);*/
if( rsize < KARATSUBA_THRESHOLD )
mpih_sqr_n_basecase( xp, rp, rsize );
else {
if( !tspace ) {
tsize = 2 * rsize;
tspace = mpi_alloc_limb_space( tsize, 0 );
}
else if( tsize < (2*rsize) ) {
mpi_free_limb_space( tspace );
tsize = 2 * rsize;
tspace = mpi_alloc_limb_space( tsize, 0 );
}
mpih_sqr_n( xp, rp, rsize, tspace );
}
xsize = 2 * rsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
xsize = msize;
}
tp = rp; rp = xp; xp = tp;
rsize = xsize;
/* To mitigate the Yarom/Falkner flush+reload cache
* side-channel attack on the RSA secret exponent, we
* do the multiplication regardless of the value of
* the high-bit of E. But to avoid this performance
* penalty we do it only if the exponent has been
* stored in secure memory and we can thus assume it
* is a secret exponent. */
if (esec || (mpi_limb_signed_t)e < 0) {
/*mpihelp_mul( xp, rp, rsize, bp, bsize );*/
if( bsize < KARATSUBA_THRESHOLD ) {
mpihelp_mul( xp, rp, rsize, bp, bsize );
}
else {
mpihelp_mul_karatsuba_case(
xp, rp, rsize, bp, bsize, &karactx );
}
xsize = rsize + bsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
xsize = msize;
}
}
if ((mpi_limb_signed_t)e < 0) {
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
e <<= 1;
c--;
}
i--;
if( i < 0 )
break;
e = ep[i];
c = BITS_PER_MPI_LIMB;
}
/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
* steps. Adjust the result by reducing it with the original MOD.
*
* Also make sure the result is put in RES->d (where it already
* might be, see above).
*/
if( mod_shift_cnt ) {
carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
rp = res->d;
if( carry_limb ) {
rp[rsize] = carry_limb;
rsize++;
}
}
else {
MPN_COPY( res->d, rp, rsize);
rp = res->d;
}
if( rsize >= msize ) {
mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
rsize = msize;
}
/* Remove any leading zero words from the result. */
if( mod_shift_cnt )
mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE (rp, rsize);
mpihelp_release_karatsuba_ctx( &karactx );
}
if( negative_result && rsize ) {
if( mod_shift_cnt )
mpihelp_rshift( mp, mp, msize, mod_shift_cnt);
mpihelp_sub( rp, mp, msize, rp, rsize);
rsize = msize;
rsign = msign;
MPN_NORMALIZE(rp, rsize);
}
res->nlimbs = rsize;
res->sign = rsign;
leave:
if( assign_rp ) mpi_assign_limb_space( res, rp, size );
if( mp_marker ) mpi_free_limb_space( mp_marker );
if( bp_marker ) mpi_free_limb_space( bp_marker );
if( ep_marker ) mpi_free_limb_space( ep_marker );
if( xp_marker ) mpi_free_limb_space( xp_marker );
if( tspace ) mpi_free_limb_space( tspace );
}
#else /*!USE_ALGORITHM_SIMPLE_EXPONENTIATION */
/**
* Internal function to compute
*
* X = R * S mod M
*
* and set the size of X at the pointer XSIZE_P.
* Use karatsuba structure at KARACTX_P.
*
* Condition:
* RSIZE >= SSIZE
* Enough space for X is allocated beforehand.
*
* For generic cases, we can/should use mpi_mulm.
* This function is use for specific internal case.
*/
static void
mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
mpi_ptr_t rp, mpi_size_t rsize,
mpi_ptr_t sp, mpi_size_t ssize,
mpi_ptr_t mp, mpi_size_t msize,
struct karatsuba_ctx *karactx_p)
{
if( ssize < KARATSUBA_THRESHOLD )
mpihelp_mul ( xp, rp, rsize, sp, ssize );
else
mpihelp_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
if (rsize + ssize > msize)
{
mpihelp_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
*xsize_p = msize;
}
else
*xsize_p = rsize + ssize;
}
#define SIZE_PRECOMP ((1 << (5 - 1)))
/****************
* RES = BASE ^ EXPO mod MOD
*
* To mitigate the Yarom/Falkner flush+reload cache side-channel
* attack on the RSA secret exponent, we don't use the square
* routine but multiplication.
*
* Reference:
* Handbook of Applied Cryptography
* Algorithm 14.83: Modified left-to-right k-ary exponentiation
*/
void
mpi_powm (MPI res, MPI base, MPI expo, MPI mod)
{
/* Pointer to the limbs of the arguments, their size and signs. */
mpi_ptr_t rp, ep, mp, bp;
mpi_size_t esize, msize, bsize, rsize;
int msign, bsign, rsign;
/* Flags telling the secure allocation status of the arguments. */
int esec, msec, bsec;
/* Size of the result including space for temporary values. */
mpi_size_t size;
/* Helper. */
int mod_shift_cnt;
int negative_result;
mpi_ptr_t mp_marker = NULL;
mpi_ptr_t bp_marker = NULL;
mpi_ptr_t ep_marker = NULL;
mpi_ptr_t xp_marker = NULL;
mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */
mpi_size_t precomp_size[SIZE_PRECOMP];
mpi_size_t W;
mpi_ptr_t base_u;
mpi_size_t base_u_size;
mpi_size_t max_u_size;
esize = expo->nlimbs;
msize = mod->nlimbs;
size = 2 * msize;
msign = mod->sign;
if (esize * BITS_PER_MPI_LIMB > 512)
W = 5;
else if (esize * BITS_PER_MPI_LIMB > 256)
W = 4;
else if (esize * BITS_PER_MPI_LIMB > 128)
W = 3;
else if (esize * BITS_PER_MPI_LIMB > 64)
W = 2;
else
W = 1;
esec = mpi_is_secure(expo);
msec = mpi_is_secure(mod);
bsec = mpi_is_secure(base);
rp = res->d;
ep = expo->d;
if (!msize)
msize = 1 / msize; /* provoke a signal */
if (!esize)
{
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
on if MOD equals 1. */
res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
if (res->nlimbs)
{
RESIZE_IF_NEEDED (res, 1);
rp = res->d;
rp[0] = 1;
}
res->sign = 0;
goto leave;
}
/* Normalize MOD (i.e. make its most significant bit set) as
required by mpn_divrem. This will make the intermediate values
in the calculation slightly larger, but the correct result is
obtained after a final reduction using the original MOD value. */
mp = mp_marker = mpi_alloc_limb_space(msize, msec);
count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
if (mod_shift_cnt)
mpihelp_lshift (mp, mod->d, msize, mod_shift_cnt);
else
MPN_COPY( mp, mod->d, msize );
bsize = base->nlimbs;
bsign = base->sign;
if (bsize > msize)
{
/* The base is larger than the module. Reduce it.
Allocate (BSIZE + 1) with space for remainder and quotient.
(The quotient is (bsize - msize + 1) limbs.) */
bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
MPN_COPY ( bp, base->d, bsize );
/* We don't care about the quotient, store it above the
* remainder, at BP + MSIZE. */
mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
bsize = msize;
/* Canonicalize the base, since we are going to multiply with it
quite a few times. */
MPN_NORMALIZE( bp, bsize );
}
else
bp = base->d;
if (!bsize)
{
res->nlimbs = 0;
res->sign = 0;
goto leave;
}
/* Make BASE, EXPO and MOD not overlap with RES. */
if ( rp == bp )
{
/* RES and BASE are identical. Allocate temp. space for BASE. */
assert (!bp_marker);
bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
MPN_COPY(bp, rp, bsize);
}
if ( rp == ep )
{
/* RES and EXPO are identical. Allocate temp. space for EXPO. */
ep = ep_marker = mpi_alloc_limb_space( esize, esec );
MPN_COPY(ep, rp, esize);
}
if ( rp == mp )
{
/* RES and MOD are identical. Allocate temporary space for MOD.*/
assert (!mp_marker);
mp = mp_marker = mpi_alloc_limb_space( msize, msec );
MPN_COPY(mp, rp, msize);
}
/* Copy base to the result. */
if (res->alloced < size)
{
mpi_resize (res, size);
rp = res->d;
}
/* Main processing. */
{
mpi_size_t i, j, k;
mpi_ptr_t xp;
mpi_size_t xsize;
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
struct karatsuba_ctx karactx;
mpi_ptr_t tp;
xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
memset( &karactx, 0, sizeof karactx );
negative_result = (ep[0] & 1) && bsign;
/* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
if (W > 1) /* X := BASE^2 */
mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
base_u_size = max_u_size = precomp_size[0] = bsize;
MPN_COPY (precomp[0], bp, bsize);
for (i = 1; i < (1 << (W - 1)); i++)
{ /* PRECOMP[i] = BASE^(2 * i + 1) */
if (xsize >= base_u_size)
mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
mp, msize, &karactx);
else
mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
mp, msize, &karactx);
base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
base_u_size = precomp_size[i] = rsize;
if (max_u_size < base_u_size)
max_u_size = base_u_size;
MPN_COPY (precomp[i], rp, rsize);
}
base_u = mpi_alloc_limb_space (max_u_size, esec);
i = esize - 1;
/* Main loop.
Make the result be pointed to alternately by XP and RP. This
helps us avoid block copying, which would otherwise be
necessary with the overlap restrictions of mpihelp_divmod. With
50% probability the result after this loop will be in the area
originally pointed by RP (==RES->d), and with 50% probability
in the area originally pointed to by XP. */
rsign = 0;
if (W == 1)
{
rsize = bsize;
}
else
{
rsize = msize;
MPN_ZERO (rp, rsize);
}
MPN_COPY ( rp, bp, bsize );
e = ep[i];
count_leading_zeros (c, e);
e = (e << c) << 1;
c = BITS_PER_MPI_LIMB - 1 - c;
j = 0;
for (;;)
if (e == 0)
{
j += c;
i--;
if ( i < 0 )
{
c = 0;
break;
}
e = ep[i];
c = BITS_PER_MPI_LIMB;
}
else
{
int c0;
mpi_limb_t e0;
count_leading_zeros (c0, e);
e = (e << c0);
c -= c0;
j += c0;
if (c >= W)
{
e0 = (e >> (BITS_PER_MPI_LIMB - W));
e = (e << W);
c -= W;
}
else
{
i--;
if ( i < 0 )
{
e = (e >> (BITS_PER_MPI_LIMB - c));
break;
}
c0 = c;
e0 = (e >> (BITS_PER_MPI_LIMB - W))
| (ep[i] >> (BITS_PER_MPI_LIMB - W + c0));
e = (ep[i] << (W - c0));
c = BITS_PER_MPI_LIMB - W + c0;
}
count_trailing_zeros (c0, e0);
e0 = (e0 >> c0) >> 1;
for (j += W - c0; j; j--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
/*
* base_u <= precomp[e0]
* base_u_size <= precomp_size[e0];
*/
base_u_size = 0;
for (k = 0; k < (1<< (W - 1)); k++)
{
struct gcry_mpi w, u;
w.alloced = w.nlimbs = precomp_size[k];
u.alloced = u.nlimbs = precomp_size[k];
w.nbits = w.nlimbs * BITS_PER_MPI_LIMB;
u.nbits = u.nlimbs * BITS_PER_MPI_LIMB;
w.sign = u.sign = 0;
w.flags = u.flags = 0;
w.d = base_u;
u.d = precomp[k];
mpi_set_cond (&w, &u, k == e0);
base_u_size |= (precomp_size[k] & ((mpi_size_t)0 - (k == e0)) );
}
mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
j = c0;
}
if (c != 0)
{
j += c;
count_trailing_zeros (c, e);
e = (e >> c);
j -= c;
}
while (j--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
if (e != 0)
{
base_u_size = 0;
for (k = 0; k < (1<< (W - 1)); k++)
{
struct gcry_mpi w, u;
w.alloced = w.nlimbs = precomp_size[k];
u.alloced = u.nlimbs = precomp_size[k];
w.nbits = w.nlimbs * BITS_PER_MPI_LIMB;
u.nbits = u.nlimbs * BITS_PER_MPI_LIMB;
w.sign = u.sign = 0;
w.flags = u.flags = 0;
w.d = base_u;
u.d = precomp[k];
mpi_set_cond (&w, &u, k == (e>>1));
base_u_size |= (precomp_size[k] & ((mpi_size_t)0 - (k == (e>>1))) );
}
mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
for (; c; c--)
{
mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
tp = rp; rp = xp; xp = tp;
rsize = xsize;
}
}
/* We shifted MOD, the modulo reduction argument, left
MOD_SHIFT_CNT steps. Adjust the result by reducing it with the
original MOD.
Also make sure the result is put in RES->d (where it already
might be, see above). */
if ( mod_shift_cnt )
{
carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
rp = res->d;
if ( carry_limb )
{
rp[rsize] = carry_limb;
rsize++;
}
}
else if (res->d != rp)
{
MPN_COPY (res->d, rp, rsize);
rp = res->d;
}
if ( rsize >= msize )
{
mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
rsize = msize;
}
/* Remove any leading zero words from the result. */
if ( mod_shift_cnt )
mpihelp_rshift (rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE (rp, rsize);
mpihelp_release_karatsuba_ctx (&karactx );
for (i = 0; i < (1 << (W - 1)); i++)
mpi_free_limb_space (precomp[i]);
mpi_free_limb_space (base_u);
}
/* Fixup for negative results. */
if ( negative_result && rsize )
{
if ( mod_shift_cnt )
mpihelp_rshift (mp, mp, msize, mod_shift_cnt);
mpihelp_sub (rp, mp, msize, rp, rsize);
rsize = msize;
rsign = msign;
MPN_NORMALIZE(rp, rsize);
}
assert (res->d == rp);
res->nlimbs = rsize;
res->sign = rsign;
leave:
if (mp_marker)
mpi_free_limb_space (mp_marker);
if (bp_marker)
mpi_free_limb_space (bp_marker);
if (ep_marker)
mpi_free_limb_space (ep_marker);
if (xp_marker)
mpi_free_limb_space (xp_marker);
}
#endif /*!USE_ALGORITHM_SIMPLE_EXPONENTIATION */