mirror of
git://git.gnupg.org/gnupg.git
synced 2025-07-02 22:46:30 +02:00
See ChangeLog: Mon Jul 17 16:35:47 CEST 2000 Werner Koch
This commit is contained in:
parent
92cd255508
commit
0bf44b072c
45 changed files with 31954 additions and 7194 deletions
|
@ -1,3 +1,30 @@
|
|||
Mon Jul 17 16:35:47 CEST 2000 Werner Koch <wk@>
|
||||
|
||||
* power/: Add all files from GMP for this CPU. Converted comments to
|
||||
CPP comments because some ASes complain about ' in comments.
|
||||
|
||||
* config.links: Support for BSDI 4.x; by Wayne Chapeskie. Add support
|
||||
for FreeBSD 5 and made the case stmt looking nicer; by Jun Kuriyama.
|
||||
Add support for NetBSD.
|
||||
(sparc8): Made the search path the same as sparc9
|
||||
(sparc64-unknown-linux-gnu): use udiv module; by Adam Mitchell.
|
||||
|
||||
* Makefile.am: c/SFLAGS/ASFLAGS/. This has only been used by the
|
||||
powerpc and actually never passed the -Wa,foo to the cc.
|
||||
|
||||
* mpih-div.c (mpihelp_divrem): The MPN_COPY_DECR copied one element
|
||||
too many. This is a gmp2.0.2p9.txt patch.
|
||||
|
||||
* longlong.h (umul_ppmm): Fixes for ARM-4. By Sean MacLennan.
|
||||
|
||||
* mpi-internal.h (karatsuba_ctx): New.
|
||||
* mpih-mul.c (mpihelp_release_karatsuba_ctx): New.
|
||||
(mpihelp_mul_karatsuba_case): New.
|
||||
(mpihelp_mul): Splitted to make use of the new functions.
|
||||
* mpi-pow.c (mpi_powm): Make use of the new splitted function to avoid
|
||||
multiple allocation of temporary memory during the karatsuba operations.
|
||||
* mpi_mpow.c: Removed the unused Barrett code.
|
||||
|
||||
2000-03-21 16:17:30 Werner Koch (wk@habibti.openit.de)
|
||||
|
||||
* config.links: Add support for FreeBSD 5.
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
INCLUDES = -I$(top_srcdir)/gcrypt
|
||||
CFLAGS = @CFLAGS@ @MPI_OPT_FLAGS@
|
||||
SFLAGS = @MPI_SFLAGS@
|
||||
ASFLAGS = @MPI_SFLAGS@
|
||||
|
||||
EXTRA_DIST = config.links
|
||||
DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S \
|
||||
mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
|
||||
# Note: we only use .S files so we should delete all left over .s
|
||||
# CLEANFILES = _*.s
|
||||
CLEANFILES = *.s
|
||||
|
||||
noinst_LTLIBRARIES = libmpi.la
|
||||
|
@ -56,4 +57,9 @@ libmpi_la_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
|
|||
.S.s:
|
||||
$(CPP) $(INCLUDES) $(DEFS) $< | grep -v '^#' >$*.s
|
||||
|
||||
# Hmmm, we should use this, so that OSes which do not distinguish
|
||||
# filename case still work. We have to see how libtool can handle this
|
||||
# $(CPP) $(INCLUDES) $(DEFS) $< | grep -v '^#' > _$*.s
|
||||
# $(COMPILE) -c _$*.s
|
||||
# mv -f _$*.o $*.o
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# sourced my ../configure to get the list of files to link
|
||||
# sourced by ../configure to get the list of files to link
|
||||
# this should set $mpi_ln_src and mpi_ln_dst.
|
||||
# Note: this is called from the above directory.
|
||||
|
||||
|
@ -12,23 +12,40 @@ echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
|
|||
|
||||
if test "$try_asm_modules" = "yes" ; then
|
||||
case "${target}" in
|
||||
i[34]86*-*-freebsd*-elf | i[34]86*-*-freebsd[3-9]* | i[34]86*-*-freebsdelf*)
|
||||
i[34]86*-*-freebsd*-elf | \
|
||||
i[34]86*-*-freebsd[3-9]* | \
|
||||
i[34]86*-*-freebsdelf* | \
|
||||
i[34]86*-*-netbsd* )
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
;;
|
||||
i[56]86*-*-freebsd*-elf | i[56]86*-*-freebsd[3-9]* | i[56]86*-*-freebsdelf*)
|
||||
i[56]86*-*-freebsd*-elf | \
|
||||
i[56]86*-*-freebsd[3-9]* | \
|
||||
i[56]86*-*-freebsdelf* | \
|
||||
i[56]86*-*-netbsd* | \
|
||||
pentium-*-netbsd* | \
|
||||
pentiumpro-*-netbsd*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i586 i386"
|
||||
;;
|
||||
i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*)
|
||||
i[34]86*-*-bsdi4*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
;;
|
||||
i[34]86*-*-linuxaout* | \
|
||||
i[34]86*-*-linuxoldld* | \
|
||||
i[34]86*-*-*bsd*)
|
||||
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
|
||||
echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
;;
|
||||
i[56]86*-*-linuxaout* | i[56]86*-*-linuxoldld* | i[56]86*-*-*bsd*)
|
||||
i[56]86*-*-linuxaout* | \
|
||||
i[56]86*-*-linuxoldld* | \
|
||||
i[56]86*-*-*bsd*)
|
||||
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
|
||||
echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
|
@ -49,7 +66,9 @@ case "${target}" in
|
|||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
;;
|
||||
i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
|
||||
i[56]86*-*-* | \
|
||||
pentium-*-* | \
|
||||
pentiumpro-*-*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i586 i386"
|
||||
|
@ -74,13 +93,23 @@ case "${target}" in
|
|||
path="pa7100 hppa1.1 hppa"
|
||||
mpi_extra_modules="udiv-qrnnd"
|
||||
;;
|
||||
sparc9*-*-* | sparc64*-*-* | ultrasparc*-*-*)
|
||||
sparc64-*-linux-gnu)
|
||||
# An extra rule because we have an report for this one only.
|
||||
# Should be compared against the next GMP version
|
||||
echo '/* configured for sparc64-*-linux-gnu */' >>./mpi/asm-syntax.h
|
||||
path="sparc32v8 sparc32"
|
||||
mpi_extra_modules="udiv"
|
||||
;;
|
||||
sparc9*-*-* | \
|
||||
sparc64*-*-* | \
|
||||
ultrasparc*-*-* )
|
||||
echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h
|
||||
path="sparc32v8 sparc32"
|
||||
;;
|
||||
sparc8*-*-* | microsparc*-*-*)
|
||||
sparc8*-*-* | \
|
||||
microsparc*-*-*)
|
||||
echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h
|
||||
path="sparc32v8"
|
||||
path="sparc32v8 sparc32"
|
||||
;;
|
||||
supersparc*-*-*)
|
||||
echo '/* configured for supersparc */' >>./mpi/asm-syntax.h
|
||||
|
@ -92,7 +121,8 @@ case "${target}" in
|
|||
path="sparc32"
|
||||
mpi_extra_modules="udiv"
|
||||
;;
|
||||
mips[34]*-*-* | mips*-*-irix6*)
|
||||
mips[34]*-*-* | \
|
||||
mips*-*-irix6*)
|
||||
echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h
|
||||
path="mips3"
|
||||
;;
|
||||
|
@ -103,7 +133,8 @@ case "${target}" in
|
|||
|
||||
# Motorola 68k configurations. Let m68k mean 68020-68040.
|
||||
# mc68000 or mc68060 configurations need to be specified explicitly
|
||||
m680[234]0*-*-linuxaout* | m68k*-*-linuxaout*)
|
||||
m680[234]0*-*-linuxaout* | \
|
||||
m68k*-*-linuxaout*)
|
||||
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
path="m68k/mc68020 m68k"
|
||||
|
@ -113,7 +144,8 @@ case "${target}" in
|
|||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
path="m68k"
|
||||
;;
|
||||
m680[234]0*-*-linux* | m68k*-*-linux*)
|
||||
m680[234]0*-*-linux* | \
|
||||
m68k*-*-linux*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
;;
|
||||
|
@ -127,12 +159,14 @@ case "${target}" in
|
|||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
path="m68k/mc68020 m68k"
|
||||
;;
|
||||
m68000*-*-* | m68060*-*-*)
|
||||
m68000*-*-* | \
|
||||
m68060*-*-*)
|
||||
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
path="m68k/mc68000"
|
||||
;;
|
||||
m680[234]0*-*-* | m68k*-*-*)
|
||||
m680[234]0*-*-* | \
|
||||
m68k*-*-*)
|
||||
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
|
||||
path="m68k/mc68020 m68k"
|
||||
|
@ -144,25 +178,37 @@ case "${target}" in
|
|||
cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h
|
||||
path="powerpc32"
|
||||
;;
|
||||
rs6000-*-aix[456789]* | rs6000-*-aix3.2.[456789])
|
||||
rs6000-*-aix[456789]* | \
|
||||
rs6000-*-aix3.2.[456789])
|
||||
mpi_sflags="-Wa,-mpwr"
|
||||
path="power"
|
||||
mpi_extra_modules="udiv-w-sdiv"
|
||||
;;
|
||||
rs6000-*-* | power-*-* | power2-*-*)
|
||||
rs6000-*-* | \
|
||||
power-*-* | \
|
||||
power2-*-*)
|
||||
mpi_sflags="-Wa,-mppc"
|
||||
path="power"
|
||||
mpi_extra_modules="udiv-w-sdiv"
|
||||
;;
|
||||
powerpc-ibm-aix4.2.* )
|
||||
# I am not sure about this one but a machine identified by
|
||||
# powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code.
|
||||
mpi_sflags="-Wa,-mpwr"
|
||||
path="power"
|
||||
mpi_extra_modules="udiv-w-sdiv"
|
||||
;;
|
||||
ppc601-*-*)
|
||||
mpi_sflags="-Wa,-mppc"
|
||||
path="power powerpc32"
|
||||
;;
|
||||
ppc60[234]*-*-* | powerpc*-*-*)
|
||||
ppc60[234]*-*-* | \
|
||||
powerpc*-*-*)
|
||||
mpi_sflags="-Wa,-mppc"
|
||||
path="powerpc32"
|
||||
;;
|
||||
ppc620-*-* | powerpc64*-*-*)
|
||||
ppc620-*-* | \
|
||||
powerpc64*-*-*)
|
||||
mpi_sflags="-Wa,-mppc"
|
||||
path="powerpc64"
|
||||
;;
|
||||
|
|
|
@ -199,6 +199,8 @@ extern UDItype __udiv_qrnnd ();
|
|||
"rI" ((USItype)(bh)), \
|
||||
"r" ((USItype)(al)), \
|
||||
"rI" ((USItype)(bl)))
|
||||
#ifdef __ARM_ARCH_3__
|
||||
/* SAM This does not work on arm4 */
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("%@ Inlined umul_ppmm
|
||||
mov %|r0, %2, lsr #16
|
||||
|
@ -218,6 +220,18 @@ extern UDItype __udiv_qrnnd ();
|
|||
: "r" ((USItype)(a)), \
|
||||
"r" ((USItype)(b)) \
|
||||
: "r0", "r1", "r2")
|
||||
#elif __ARM_ARCH_4__
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("%@ Inlined umul_ppmm
|
||||
umull %r1, %r0, %r2, %r3" \
|
||||
: "=&r" ((USItype)(xh)), \
|
||||
"=r" ((USItype)(xl)) \
|
||||
: "r" ((USItype)(a)), \
|
||||
"r" ((USItype)(b)) \
|
||||
: "r0", "r1")
|
||||
#else
|
||||
#error Untested architecture
|
||||
#endif
|
||||
#define UMUL_TIME 20
|
||||
#define UDIV_TIME 100
|
||||
#endif /* __arm__ */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* mpi-internal.h - Internal to the Multi Precision Integers
|
||||
* Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
|
@ -186,6 +186,17 @@ mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
|
|||
int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
|
||||
|
||||
/*-- mpihelp-mul.c --*/
|
||||
|
||||
struct karatsuba_ctx {
|
||||
struct karatsuba_ctx *next;
|
||||
mpi_ptr_t tspace;
|
||||
mpi_size_t tspace_size;
|
||||
mpi_ptr_t tp;
|
||||
mpi_size_t tp_size;
|
||||
};
|
||||
|
||||
void mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx );
|
||||
|
||||
mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
|
||||
mpi_size_t s1_size, mpi_limb_t s2_limb);
|
||||
mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
|
||||
|
@ -198,6 +209,12 @@ void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
|
|||
void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
|
||||
mpi_ptr_t tspace);
|
||||
|
||||
void mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
|
||||
mpi_ptr_t up, mpi_size_t usize,
|
||||
mpi_ptr_t vp, mpi_size_t vsize,
|
||||
struct karatsuba_ctx *ctx );
|
||||
|
||||
|
||||
/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
|
||||
mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
|
||||
mpi_size_t s1_size, mpi_limb_t s2_limb);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* mpi-pow.c - MPI functions
|
||||
* Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
|
@ -30,9 +30,10 @@
|
|||
#include <config.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "mpi-internal.h"
|
||||
#include "longlong.h"
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
/****************
|
||||
|
@ -159,7 +160,9 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
|
|||
int c;
|
||||
mpi_limb_t e;
|
||||
mpi_limb_t carry_limb;
|
||||
struct karatsuba_ctx karactx;
|
||||
|
||||
memset( &karactx, 0, sizeof karactx );
|
||||
negative_result = (ep[0] & 1) && base->sign;
|
||||
|
||||
i = esize - 1;
|
||||
|
@ -177,6 +180,7 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
|
|||
* by RP (==RES->d), and with 50% probability in the area originally
|
||||
* pointed to by XP.
|
||||
*/
|
||||
|
||||
for(;;) {
|
||||
while( c ) {
|
||||
mpi_ptr_t tp;
|
||||
|
@ -194,7 +198,6 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
|
|||
mpi_free_limb_space( tspace );
|
||||
tsize = 2 * rsize;
|
||||
tspace = mpi_alloc_limb_space( tsize, 0 );
|
||||
|
||||
}
|
||||
mpih_sqr_n( xp, rp, rsize, tspace );
|
||||
}
|
||||
|
@ -209,7 +212,15 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
|
|||
rsize = xsize;
|
||||
|
||||
if( (mpi_limb_signed_t)e < 0 ) {
|
||||
mpihelp_mul( xp, rp, rsize, bp, bsize );
|
||||
/*mpihelp_mul( xp, rp, rsize, bp, bsize );*/
|
||||
if( bsize < KARATSUBA_THRESHOLD ) {
|
||||
mpihelp_mul( xp, rp, rsize, bp, bsize );
|
||||
}
|
||||
else {
|
||||
mpihelp_mul_karatsuba_case(
|
||||
xp, rp, rsize, bp, bsize, &karactx );
|
||||
}
|
||||
|
||||
xsize = rsize + bsize;
|
||||
if( xsize > msize ) {
|
||||
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
|
||||
|
@ -258,6 +269,8 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
|
|||
if( mod_shift_cnt )
|
||||
mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
|
||||
MPN_NORMALIZE (rp, rsize);
|
||||
|
||||
mpihelp_release_karatsuba_ctx( &karactx );
|
||||
}
|
||||
|
||||
if( negative_result && rsize ) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* mpihelp-div.c - MPI helper functions
|
||||
* Copyright (C) 1998 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
|
@ -338,7 +338,7 @@ mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
|
|||
}
|
||||
else {
|
||||
n2 = np[dsize - 1];
|
||||
MPN_COPY_DECR (np + 1, np, dsize);
|
||||
MPN_COPY_DECR (np + 1, np, dsize - 1);
|
||||
np[0] = 0;
|
||||
}
|
||||
|
||||
|
|
119
mpi/mpih-mul.c
119
mpi/mpih-mul.c
|
@ -1,5 +1,5 @@
|
|||
/* mpihelp-mul.c - MPI helper functions
|
||||
* Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
|
||||
* Copyright (C) 1994, 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
|
@ -29,10 +29,10 @@
|
|||
#include <config.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "mpi-internal.h"
|
||||
#include "longlong.h"
|
||||
#include "g10lib.h" /* for g10_is_secure() */
|
||||
|
||||
#include "g10lib.h" /* g10_is_secure() */
|
||||
|
||||
|
||||
#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
|
||||
|
@ -373,6 +373,86 @@ mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
|
|||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
|
||||
mpi_ptr_t up, mpi_size_t usize,
|
||||
mpi_ptr_t vp, mpi_size_t vsize,
|
||||
struct karatsuba_ctx *ctx )
|
||||
{
|
||||
mpi_limb_t cy;
|
||||
|
||||
if( !ctx->tspace || ctx->tspace_size < vsize ) {
|
||||
if( ctx->tspace )
|
||||
mpi_free_limb_space( ctx->tspace );
|
||||
ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
|
||||
g10_is_secure( up ) || g10_is_secure( vp ) );
|
||||
ctx->tspace_size = vsize;
|
||||
}
|
||||
|
||||
MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
|
||||
|
||||
prodp += vsize;
|
||||
up += vsize;
|
||||
usize -= vsize;
|
||||
if( usize >= vsize ) {
|
||||
if( !ctx->tp || ctx->tp_size < vsize ) {
|
||||
if( ctx->tp )
|
||||
mpi_free_limb_space( ctx->tp );
|
||||
ctx->tp = mpi_alloc_limb_space( 2 * vsize, g10_is_secure( up )
|
||||
|| g10_is_secure( vp ) );
|
||||
ctx->tp_size = vsize;
|
||||
}
|
||||
|
||||
do {
|
||||
MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
|
||||
cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
|
||||
mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
|
||||
prodp += vsize;
|
||||
up += vsize;
|
||||
usize -= vsize;
|
||||
} while( usize >= vsize );
|
||||
}
|
||||
|
||||
if( usize ) {
|
||||
if( usize < KARATSUBA_THRESHOLD ) {
|
||||
mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
|
||||
}
|
||||
else {
|
||||
if( !ctx->next ) {
|
||||
ctx->next = g10_xcalloc( 1, sizeof *ctx );
|
||||
}
|
||||
mpihelp_mul_karatsuba_case( ctx->tspace,
|
||||
vp, vsize,
|
||||
up, usize,
|
||||
ctx->next );
|
||||
}
|
||||
|
||||
cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
|
||||
mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
|
||||
{
|
||||
struct karatsuba_ctx *ctx2;
|
||||
|
||||
if( ctx->tp )
|
||||
mpi_free_limb_space( ctx->tp );
|
||||
if( ctx->tspace )
|
||||
mpi_free_limb_space( ctx->tspace );
|
||||
for( ctx=ctx->next; ctx; ctx = ctx2 ) {
|
||||
ctx2 = ctx->next;
|
||||
if( ctx->tp )
|
||||
mpi_free_limb_space( ctx->tp );
|
||||
if( ctx->tspace )
|
||||
mpi_free_limb_space( ctx->tspace );
|
||||
g10_free( ctx );
|
||||
}
|
||||
}
|
||||
|
||||
/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
|
||||
* and v (pointed to by VP, with VSIZE limbs), and store the result at
|
||||
* PRODP. USIZE + VSIZE limbs are always stored, but if the input
|
||||
|
@ -394,7 +474,7 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
|
|||
{
|
||||
mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
|
||||
mpi_limb_t cy;
|
||||
mpi_ptr_t tspace;
|
||||
struct karatsuba_ctx ctx;
|
||||
|
||||
if( vsize < KARATSUBA_THRESHOLD ) {
|
||||
mpi_size_t i;
|
||||
|
@ -438,34 +518,9 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
|
|||
return cy;
|
||||
}
|
||||
|
||||
tspace = mpi_alloc_limb_space( 2 * vsize,
|
||||
g10_is_secure( up ) || g10_is_secure( vp ) );
|
||||
MPN_MUL_N_RECURSE( prodp, up, vp, vsize, tspace );
|
||||
|
||||
prodp += vsize;
|
||||
up += vsize;
|
||||
usize -= vsize;
|
||||
if( usize >= vsize ) {
|
||||
mpi_ptr_t tp = mpi_alloc_limb_space( 2 * vsize, g10_is_secure( up )
|
||||
|| g10_is_secure( vp ) );
|
||||
do {
|
||||
MPN_MUL_N_RECURSE( tp, up, vp, vsize, tspace );
|
||||
cy = mpihelp_add_n( prodp, prodp, tp, vsize );
|
||||
mpihelp_add_1( prodp + vsize, tp + vsize, vsize, cy );
|
||||
prodp += vsize;
|
||||
up += vsize;
|
||||
usize -= vsize;
|
||||
} while( usize >= vsize );
|
||||
mpi_free_limb_space( tp );
|
||||
}
|
||||
|
||||
if( usize ) {
|
||||
mpihelp_mul( tspace, vp, vsize, up, usize );
|
||||
cy = mpihelp_add_n( prodp, prodp, tspace, vsize);
|
||||
mpihelp_add_1( prodp + vsize, tspace + vsize, usize, cy );
|
||||
}
|
||||
|
||||
mpi_free_limb_space( tspace );
|
||||
memset( &ctx, 0, sizeof ctx );
|
||||
mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
|
||||
mpihelp_release_karatsuba_ctx( &ctx );
|
||||
return *prod_endp;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
mpih-add1.S
|
||||
mpih-lshift.S
|
||||
mpih-mul1.S
|
||||
mpih-mul2.S
|
||||
mpih-mul3.S
|
||||
mpih-rshift.S
|
||||
mpih-sub1.S
|
86
mpi/power/mpih-add1.S
Normal file
86
mpi/power/mpih-add1.S
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# s2_ptr r5
|
||||
# size r6
|
||||
*/
|
||||
|
||||
.toc
|
||||
.extern mpihelp_add_n[DS]
|
||||
.extern .mpihelp_add_n
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl mpihelp_add_n
|
||||
.globl .mpihelp_add_n
|
||||
.csect mpihelp_add_n[DS]
|
||||
mpihelp_add_n:
|
||||
.long .mpihelp_add_n, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.mpihelp_add_n:
|
||||
andil. 10,6,1 # odd or even number of limbs?
|
||||
l 8,0(4) # load least significant s1 limb
|
||||
l 0,0(5) # load least significant s2 limb
|
||||
cal 3,-4(3) # offset res_ptr, it's updated before it's used
|
||||
sri 10,6,1 # count for unrolled loop
|
||||
a 7,0,8 # add least significant limbs, set cy
|
||||
mtctr 10 # copy count into CTR
|
||||
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
|
||||
|
||||
# We have an odd # of limbs. Add the first limbs separately.
|
||||
cmpi 1,10,0 # is count for unrolled loop zero?
|
||||
bne 1,L1 # branch if not
|
||||
st 7,4(3)
|
||||
aze 3,10 # use the fact that r10 is zero...
|
||||
br # return
|
||||
|
||||
# We added least significant limbs. Now reload the next limbs to enter loop.
|
||||
L1: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
stu 7,4(3)
|
||||
ae 7,0,8 # add limbs, set cy
|
||||
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
|
||||
lu 10,4(5) # load s2 limb and update s2_ptr
|
||||
bdz Lend # If done, skip loop
|
||||
|
||||
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
ae 11,9,10 # add previous limbs with cy, set cy
|
||||
stu 7,4(3) #
|
||||
lu 9,4(4) # load s1 limb and update s1_ptr
|
||||
lu 10,4(5) # load s2 limb and update s2_ptr
|
||||
ae 7,0,8 # add previous limbs with cy, set cy
|
||||
stu 11,4(3) #
|
||||
bdn Loop # decrement CTR and loop back
|
||||
|
||||
Lend: ae 11,9,10 # add limbs with cy, set cy
|
||||
st 7,4(3) #
|
||||
st 11,8(3) #
|
||||
lil 3,0 # load cy into ...
|
||||
aze 3,3 # ... return value register
|
||||
br
|
||||
|
64
mpi/power/mpih-lshift.S
Normal file
64
mpi/power/mpih-lshift.S
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* IBM POWER lshift
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s_ptr r4
|
||||
# size r5
|
||||
# cnt r6
|
||||
*/
|
||||
|
||||
.toc
|
||||
.extern mpihelp_lshift[DS]
|
||||
.extern .mpihelp_lshift
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl mpihelp_lshift
|
||||
.globl .mpihelp_lshift
|
||||
.csect mpihelp_lshift[DS]
|
||||
mpihelp_lshift:
|
||||
.long .mpihelp_lshift, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.mpihelp_lshift:
|
||||
sli 0,5,2
|
||||
cax 9,3,0
|
||||
cax 4,4,0
|
||||
sfi 8,6,32
|
||||
mtctr 5 # put limb count in CTR loop register
|
||||
lu 0,-4(4) # read most significant limb
|
||||
sre 3,0,8 # compute carry out limb, and init MQ register
|
||||
bdz Lend2 # if just one limb, skip loop
|
||||
lu 0,-4(4) # read 2:nd most significant limb
|
||||
sreq 7,0,8 # compute most significant limb of result
|
||||
bdz Lend # if just two limb, skip loop
|
||||
Loop: lu 0,-4(4) # load next lower limb
|
||||
stu 7,-4(9) # store previous result during read latency
|
||||
sreq 7,0,8 # compute result limb
|
||||
bdn Loop # loop back until CTR is zero
|
||||
Lend: stu 7,-4(9) # store 2:nd least significant limb
|
||||
Lend2: sle 7,0,6 # compute least significant limb
|
||||
st 7,-4(9) # store it
|
||||
br
|
||||
|
115
mpi/power/mpih-mul1.S
Normal file
115
mpi/power/mpih-mul1.S
Normal file
|
@ -0,0 +1,115 @@
|
|||
/* IBM POWER mul_1 -- Multiply a limb vector with a limb and store
|
||||
* the result in a second limb vector.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
*/
|
||||
|
||||
.toc
|
||||
.csect .mpihelp_mul_1[PR]
|
||||
.align 2
|
||||
.globl mpihelp_mul_1
|
||||
.globl .mpihelp_mul_1
|
||||
.csect mpihelp_mul_1[DS]
|
||||
mpihelp_mul_1:
|
||||
.long .mpihelp_mul_1[PR], TOC[tc0], 0
|
||||
.csect .mpihelp_mul_1[PR]
|
||||
.mpihelp_mul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 8
|
||||
ai 0,0,0 # reset carry
|
||||
cax 9,9,7
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 8,0,9
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
cax 10,10,0 # adjust high limb for negative s2_limb
|
||||
mfmq 0
|
||||
ae 8,0,9
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
cax 9,9,0 # adjust high limb for negative s2_limb
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
||||
|
130
mpi/power/mpih-mul2.S
Normal file
130
mpi/power/mpih-mul2.S
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add
|
||||
* the result to a second limb vector.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
*/
|
||||
|
||||
.toc
|
||||
.csect .mpihelp_addmul_1[PR]
|
||||
.align 2
|
||||
.globl mpihelp_addmul_1
|
||||
.globl .mpihelp_addmul_1
|
||||
.csect mpihelp_addmul_1[DS]
|
||||
mpihelp_addmul_1:
|
||||
.long .mpihelp_addmul_1[PR], TOC[tc0], 0
|
||||
.csect .mpihelp_addmul_1[PR]
|
||||
.mpihelp_addmul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 8
|
||||
cax 9,9,7
|
||||
l 7,4(3)
|
||||
a 8,8,7 # add res_limb
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 8,0,9 # low limb + old_cy_limb + old cy
|
||||
l 7,4(3)
|
||||
aze 10,10 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 8,0,10
|
||||
l 7,4(3)
|
||||
aze 9,9
|
||||
a 8,8,7
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 7
|
||||
ae 8,7,9
|
||||
l 7,4(3)
|
||||
ae 10,10,0 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 7
|
||||
ae 8,7,10
|
||||
l 7,4(3)
|
||||
ae 9,9,0 # propagate cy to new cy_limb
|
||||
a 8,8,7 # add res_limb
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
||||
|
135
mpi/power/mpih-mul3.S
Normal file
135
mpi/power/mpih-mul3.S
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
* the result from a second limb vector.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# size r5
|
||||
# s2_limb r6
|
||||
|
||||
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
|
||||
# obtain that operation, we have to use the 32x32->64 signed multiplication
|
||||
# instruction, and add the appropriate compensation to the high limb of the
|
||||
# result. We add the multiplicand if the multiplier has its most significant
|
||||
# bit set, and we add the multiplier if the multiplicand has its most
|
||||
# significant bit set. We need to preserve the carry flag between each
|
||||
# iteration, so we have to compute the compensation carefully (the natural,
|
||||
# srai+and doesn't work). Since the POWER architecture has a branch unit
|
||||
# we can branch in zero cycles, so that's how we perform the additions.
|
||||
*/
|
||||
|
||||
.toc
|
||||
.csect .mpihelp_submul_1[PR]
|
||||
.align 2
|
||||
.globl mpihelp_submul_1
|
||||
.globl .mpihelp_submul_1
|
||||
.csect mpihelp_submul_1[DS]
|
||||
mpihelp_submul_1:
|
||||
.long .mpihelp_submul_1[PR], TOC[tc0], 0
|
||||
.csect .mpihelp_submul_1[PR]
|
||||
.mpihelp_submul_1:
|
||||
|
||||
cal 3,-4(3)
|
||||
l 0,0(4)
|
||||
cmpi 0,6,0
|
||||
mtctr 5
|
||||
mul 9,0,6
|
||||
srai 7,0,31
|
||||
and 7,7,6
|
||||
mfmq 11
|
||||
cax 9,9,7
|
||||
l 7,4(3)
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
blt Lneg
|
||||
Lpos: bdz Lend
|
||||
|
||||
Lploop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 0
|
||||
ae 11,0,9 # low limb + old_cy_limb + old cy
|
||||
l 7,4(3)
|
||||
aze 10,10 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Lp0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Lp0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 0
|
||||
ae 11,0,10
|
||||
l 7,4(3)
|
||||
aze 9,9
|
||||
sf 8,11,7
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Lp1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Lp1: bdn Lploop
|
||||
|
||||
b Lend
|
||||
|
||||
Lneg: cax 9,9,0
|
||||
bdz Lend
|
||||
Lnloop: lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 10,0,6
|
||||
mfmq 7
|
||||
ae 11,7,9
|
||||
l 7,4(3)
|
||||
ae 10,10,0 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Ln0
|
||||
cax 10,10,6 # adjust high limb for negative limb from s1
|
||||
Ln0: bdz Lend0
|
||||
lu 0,4(4)
|
||||
stu 8,4(3)
|
||||
cmpi 0,0,0
|
||||
mul 9,0,6
|
||||
mfmq 7
|
||||
ae 11,7,10
|
||||
l 7,4(3)
|
||||
ae 9,9,0 # propagate cy to new cy_limb
|
||||
sf 8,11,7 # add res_limb
|
||||
a 11,8,11 # invert cy (r11 is junk)
|
||||
bge Ln1
|
||||
cax 9,9,6 # adjust high limb for negative limb from s1
|
||||
Ln1: bdn Lnloop
|
||||
b Lend
|
||||
|
||||
Lend0: cal 9,0(10)
|
||||
Lend: st 8,4(3)
|
||||
aze 3,9
|
||||
br
|
||||
|
64
mpi/power/mpih-rshift.S
Normal file
64
mpi/power/mpih-rshift.S
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* IBM POWER rshift
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s_ptr r4
|
||||
# size r5
|
||||
# cnt r6
|
||||
*/
|
||||
|
||||
.toc
|
||||
.extern mpihelp_rshift[DS]
|
||||
.extern .mpihelp_rshift
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl mpihelp_rshift
|
||||
.globl .mpihelp_rshift
|
||||
.csect mpihelp_rshift[DS]
|
||||
mpihelp_rshift:
|
||||
.long .mpihelp_rshift, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.mpihelp_rshift:
|
||||
sfi 8,6,32
|
||||
mtctr 5 # put limb count in CTR loop register
|
||||
l 0,0(4) # read least significant limb
|
||||
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
|
||||
sle 3,0,8 # compute carry limb, and init MQ register
|
||||
bdz Lend2 # if just one limb, skip loop
|
||||
lu 0,4(4) # read 2:nd least significant limb
|
||||
sleq 7,0,8 # compute least significant limb of result
|
||||
bdz Lend # if just two limb, skip loop
|
||||
Loop: lu 0,4(4) # load next higher limb
|
||||
stu 7,4(9) # store previous result during read latency
|
||||
sleq 7,0,8 # compute result limb
|
||||
bdn Loop # loop back until CTR is zero
|
||||
Lend: stu 7,4(9) # store 2:nd most significant limb
|
||||
Lend2: sre 7,0,6 # compute most significant limb
|
||||
st 7,4(9) # store it
|
||||
br
|
||||
|
||||
|
87
mpi/power/mpih-sub1.S
Normal file
87
mpi/power/mpih-sub1.S
Normal file
|
@ -0,0 +1,87 @@
|
|||
/* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GnuPG.
|
||||
*
|
||||
* GnuPG is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GnuPG is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
/*
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r3
|
||||
# s1_ptr r4
|
||||
# s2_ptr r5
|
||||
# size r6
|
||||
*/
|
||||
|
||||
.toc
|
||||
.extern mpihelp_sub_n[DS]
|
||||
.extern .mpihelp_sub_n
|
||||
.csect [PR]
|
||||
.align 2
|
||||
.globl mpihelp_sub_n
|
||||
.globl .mpihelp_sub_n
|
||||
.csect mpihelp_sub_n[DS]
|
||||
mpihelp_sub_n:
|
||||
.long .mpihelp_sub_n, TOC[tc0], 0
|
||||
.csect [PR]
|
||||
.mpihelp_sub_n:
|
||||
andil. 10,6,1 # odd or even number of limbs?
|
||||
l 8,0(4) # load least significant s1 limb
|
||||
l 0,0(5) # load least significant s2 limb
|
||||
cal 3,-4(3) # offset res_ptr, it's updated before it's used
|
||||
sri 10,6,1 # count for unrolled loop
|
||||
sf 7,0,8 # subtract least significant limbs, set cy
|
||||
mtctr 10 # copy count into CTR
|
||||
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
|
||||
|
||||
# We have an odd # of limbs. Add the first limbs separately.
|
||||
cmpi 1,10,0 # is count for unrolled loop zero?
|
||||
bne 1,L1 # branch if not
|
||||
st 7,4(3)
|
||||
sfe 3,0,0 # load !cy into ...
|
||||
sfi 3,3,0 # ... return value register
|
||||
br # return
|
||||
|
||||
# We added least significant limbs. Now reload the next limbs to enter loop.
|
||||
L1: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
stu 7,4(3)
|
||||
sfe 7,0,8 # subtract limbs, set cy
|
||||
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
|
||||
lu 10,4(5) # load s2 limb and update s2_ptr
|
||||
bdz Lend # If done, skip loop
|
||||
|
||||
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
|
||||
lu 0,4(5) # load s2 limb and update s2_ptr
|
||||
sfe 11,10,9 # subtract previous limbs with cy, set cy
|
||||
stu 7,4(3) #
|
||||
lu 9,4(4) # load s1 limb and update s1_ptr
|
||||
lu 10,4(5) # load s2 limb and update s2_ptr
|
||||
sfe 7,0,8 # subtract previous limbs with cy, set cy
|
||||
stu 11,4(3) #
|
||||
bdn Loop # decrement CTR and loop back
|
||||
|
||||
Lend: sfe 11,10,9 # subtract limbs with cy, set cy
|
||||
st 7,4(3) #
|
||||
st 11,8(3) #
|
||||
sfe 3,0,0 # load !cy into ...
|
||||
sfi 3,3,0 # ... return value register
|
||||
br
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue