1
0
Fork 0
mirror of git://git.gnupg.org/gnupg.git synced 2025-07-02 22:46:30 +02:00

See ChangeLog: Mon Jul 17 16:35:47 CEST 2000 Werner Koch

This commit is contained in:
Werner Koch 2000-07-17 14:32:21 +00:00
parent 92cd255508
commit 0bf44b072c
45 changed files with 31954 additions and 7194 deletions

View file

@ -1,3 +1,30 @@
Mon Jul 17 16:35:47 CEST 2000 Werner Koch <wk@>
* power/: Add all files from GMP for this CPU. Converted comments to
CPP comments because some ASes complain about ' in comments.
* config.links: Support for BSDI 4.x; by Wayne Chapeskie. Add support
for FreeBSD 5 and made the case stmt looking nicer; by Jun Kuriyama.
Add support for NetBSD.
(sparc8): Made the search path the same as sparc9
(sparc64-unknown-linux-gnu): use udiv module; by Adam Mitchell.
* Makefile.am: c/SFLAGS/ASFLAGS/. This has only been used by the
powerpc and actually never passed the -Wa,foo to the cc.
* mpih-div.c (mpihelp_divrem): The MPN_COPY_DECR copied one element
too many. This is a gmp2.0.2p9.txt patch.
* longlong.h (umul_ppmm): Fixes for ARM-4. By Sean MacLennan.
* mpi-internal.h (karatsuba_ctx): New.
* mpih-mul.c (mpihelp_release_karatsuba_ctx): New.
(mpihelp_mul_karatsuba_case): New.
(mpihelp_mul): Splitted to make use of the new functions.
* mpi-pow.c (mpi_powm): Make use of the new splitted function to avoid
multiple allocation of temporary memory during the karatsuba operations.
* mpi_mpow.c: Removed the unused Barrett code.
2000-03-21 16:17:30 Werner Koch (wk@habibti.openit.de)
* config.links: Add support for FreeBSD 5.

View file

@ -3,12 +3,13 @@
INCLUDES = -I$(top_srcdir)/gcrypt
CFLAGS = @CFLAGS@ @MPI_OPT_FLAGS@
SFLAGS = @MPI_SFLAGS@
ASFLAGS = @MPI_SFLAGS@
EXTRA_DIST = config.links
DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S \
mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
# Note: we only use .S files so we should delete all left over .s
# CLEANFILES = _*.s
CLEANFILES = *.s
noinst_LTLIBRARIES = libmpi.la
@ -56,4 +57,9 @@ libmpi_la_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
.S.s:
$(CPP) $(INCLUDES) $(DEFS) $< | grep -v '^#' >$*.s
# Hmmm, we should use this, so that OSes which do not distinguish
# filename case still work. We have to see how libtool can handle this
# $(CPP) $(INCLUDES) $(DEFS) $< | grep -v '^#' > _$*.s
# $(COMPILE) -c _$*.s
# mv -f _$*.o $*.o

View file

@ -1,4 +1,4 @@
# sourced my ../configure to get the list of files to link
# sourced by ../configure to get the list of files to link
# this should set $mpi_ln_src and mpi_ln_dst.
# Note: this is called from the above directory.
@ -12,23 +12,40 @@ echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
if test "$try_asm_modules" = "yes" ; then
case "${target}" in
i[34]86*-*-freebsd*-elf | i[34]86*-*-freebsd[3-9]* | i[34]86*-*-freebsdelf*)
i[34]86*-*-freebsd*-elf | \
i[34]86*-*-freebsd[3-9]* | \
i[34]86*-*-freebsdelf* | \
i[34]86*-*-netbsd* )
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
;;
i[56]86*-*-freebsd*-elf | i[56]86*-*-freebsd[3-9]* | i[56]86*-*-freebsdelf*)
i[56]86*-*-freebsd*-elf | \
i[56]86*-*-freebsd[3-9]* | \
i[56]86*-*-freebsdelf* | \
i[56]86*-*-netbsd* | \
pentium-*-netbsd* | \
pentiumpro-*-netbsd*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i586 i386"
;;
i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*)
i[34]86*-*-bsdi4*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
;;
i[34]86*-*-linuxaout* | \
i[34]86*-*-linuxoldld* | \
i[34]86*-*-*bsd*)
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
;;
i[56]86*-*-linuxaout* | i[56]86*-*-linuxoldld* | i[56]86*-*-*bsd*)
i[56]86*-*-linuxaout* | \
i[56]86*-*-linuxoldld* | \
i[56]86*-*-*bsd*)
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
@ -49,7 +66,9 @@ case "${target}" in
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
;;
i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
i[56]86*-*-* | \
pentium-*-* | \
pentiumpro-*-*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i586 i386"
@ -74,13 +93,23 @@ case "${target}" in
path="pa7100 hppa1.1 hppa"
mpi_extra_modules="udiv-qrnnd"
;;
sparc9*-*-* | sparc64*-*-* | ultrasparc*-*-*)
sparc64-*-linux-gnu)
# An extra rule because we have an report for this one only.
# Should be compared against the next GMP version
echo '/* configured for sparc64-*-linux-gnu */' >>./mpi/asm-syntax.h
path="sparc32v8 sparc32"
mpi_extra_modules="udiv"
;;
sparc9*-*-* | \
sparc64*-*-* | \
ultrasparc*-*-* )
echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h
path="sparc32v8 sparc32"
;;
sparc8*-*-* | microsparc*-*-*)
sparc8*-*-* | \
microsparc*-*-*)
echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h
path="sparc32v8"
path="sparc32v8 sparc32"
;;
supersparc*-*-*)
echo '/* configured for supersparc */' >>./mpi/asm-syntax.h
@ -92,7 +121,8 @@ case "${target}" in
path="sparc32"
mpi_extra_modules="udiv"
;;
mips[34]*-*-* | mips*-*-irix6*)
mips[34]*-*-* | \
mips*-*-irix6*)
echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h
path="mips3"
;;
@ -103,7 +133,8 @@ case "${target}" in
# Motorola 68k configurations. Let m68k mean 68020-68040.
# mc68000 or mc68060 configurations need to be specified explicitly
m680[234]0*-*-linuxaout* | m68k*-*-linuxaout*)
m680[234]0*-*-linuxaout* | \
m68k*-*-linuxaout*)
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
path="m68k/mc68020 m68k"
@ -113,7 +144,8 @@ case "${target}" in
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
path="m68k"
;;
m680[234]0*-*-linux* | m68k*-*-linux*)
m680[234]0*-*-linux* | \
m68k*-*-linux*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
;;
@ -127,12 +159,14 @@ case "${target}" in
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
path="m68k/mc68020 m68k"
;;
m68000*-*-* | m68060*-*-*)
m68000*-*-* | \
m68060*-*-*)
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
path="m68k/mc68000"
;;
m680[234]0*-*-* | m68k*-*-*)
m680[234]0*-*-* | \
m68k*-*-*)
echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h
path="m68k/mc68020 m68k"
@ -144,25 +178,37 @@ case "${target}" in
cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h
path="powerpc32"
;;
rs6000-*-aix[456789]* | rs6000-*-aix3.2.[456789])
rs6000-*-aix[456789]* | \
rs6000-*-aix3.2.[456789])
mpi_sflags="-Wa,-mpwr"
path="power"
mpi_extra_modules="udiv-w-sdiv"
;;
rs6000-*-* | power-*-* | power2-*-*)
rs6000-*-* | \
power-*-* | \
power2-*-*)
mpi_sflags="-Wa,-mppc"
path="power"
mpi_extra_modules="udiv-w-sdiv"
;;
powerpc-ibm-aix4.2.* )
# I am not sure about this one but a machine identified by
# powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code.
mpi_sflags="-Wa,-mpwr"
path="power"
mpi_extra_modules="udiv-w-sdiv"
;;
ppc601-*-*)
mpi_sflags="-Wa,-mppc"
path="power powerpc32"
;;
ppc60[234]*-*-* | powerpc*-*-*)
ppc60[234]*-*-* | \
powerpc*-*-*)
mpi_sflags="-Wa,-mppc"
path="powerpc32"
;;
ppc620-*-* | powerpc64*-*-*)
ppc620-*-* | \
powerpc64*-*-*)
mpi_sflags="-Wa,-mppc"
path="powerpc64"
;;

View file

@ -199,6 +199,8 @@ extern UDItype __udiv_qrnnd ();
"rI" ((USItype)(bh)), \
"r" ((USItype)(al)), \
"rI" ((USItype)(bl)))
#ifdef __ARM_ARCH_3__
/* SAM This does not work on arm4 */
#define umul_ppmm(xh, xl, a, b) \
__asm__ ("%@ Inlined umul_ppmm
mov %|r0, %2, lsr #16
@ -218,6 +220,18 @@ extern UDItype __udiv_qrnnd ();
: "r" ((USItype)(a)), \
"r" ((USItype)(b)) \
: "r0", "r1", "r2")
#elif __ARM_ARCH_4__
#define umul_ppmm(xh, xl, a, b) \
__asm__ ("%@ Inlined umul_ppmm
umull %r1, %r0, %r2, %r3" \
: "=&r" ((USItype)(xh)), \
"=r" ((USItype)(xl)) \
: "r" ((USItype)(a)), \
"r" ((USItype)(b)) \
: "r0", "r1")
#else
#error Untested architecture
#endif
#define UMUL_TIME 20
#define UDIV_TIME 100
#endif /* __arm__ */

View file

@ -1,6 +1,6 @@
/* mpi-internal.h - Internal to the Multi Precision Integers
* Copyright (C) 1998 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -186,6 +186,17 @@ mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
/*-- mpihelp-mul.c --*/
struct karatsuba_ctx {
struct karatsuba_ctx *next;
mpi_ptr_t tspace;
mpi_size_t tspace_size;
mpi_ptr_t tp;
mpi_size_t tp_size;
};
void mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx );
mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
mpi_size_t s1_size, mpi_limb_t s2_limb);
mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@ -198,6 +209,12 @@ void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
mpi_ptr_t tspace);
void mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
mpi_ptr_t up, mpi_size_t usize,
mpi_ptr_t vp, mpi_size_t vsize,
struct karatsuba_ctx *ctx );
/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
mpi_size_t s1_size, mpi_limb_t s2_limb);

View file

@ -1,6 +1,6 @@
/* mpi-pow.c - MPI functions
* Copyright (C) 1998 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -30,9 +30,10 @@
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "mpi-internal.h"
#include "longlong.h"
#include <assert.h>
/****************
@ -159,7 +160,9 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
struct karatsuba_ctx karactx;
memset( &karactx, 0, sizeof karactx );
negative_result = (ep[0] & 1) && base->sign;
i = esize - 1;
@ -177,6 +180,7 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
* by RP (==RES->d), and with 50% probability in the area originally
* pointed to by XP.
*/
for(;;) {
while( c ) {
mpi_ptr_t tp;
@ -194,7 +198,6 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
mpi_free_limb_space( tspace );
tsize = 2 * rsize;
tspace = mpi_alloc_limb_space( tsize, 0 );
}
mpih_sqr_n( xp, rp, rsize, tspace );
}
@ -209,7 +212,15 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
rsize = xsize;
if( (mpi_limb_signed_t)e < 0 ) {
mpihelp_mul( xp, rp, rsize, bp, bsize );
/*mpihelp_mul( xp, rp, rsize, bp, bsize );*/
if( bsize < KARATSUBA_THRESHOLD ) {
mpihelp_mul( xp, rp, rsize, bp, bsize );
}
else {
mpihelp_mul_karatsuba_case(
xp, rp, rsize, bp, bsize, &karactx );
}
xsize = rsize + bsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
@ -258,6 +269,8 @@ gcry_mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
if( mod_shift_cnt )
mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE (rp, rsize);
mpihelp_release_karatsuba_ctx( &karactx );
}
if( negative_result && rsize ) {

View file

@ -1,6 +1,6 @@
/* mpihelp-div.c - MPI helper functions
* Copyright (C) 1998 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -338,7 +338,7 @@ mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
}
else {
n2 = np[dsize - 1];
MPN_COPY_DECR (np + 1, np, dsize);
MPN_COPY_DECR (np + 1, np, dsize - 1);
np[0] = 0;
}

View file

@ -1,5 +1,5 @@
/* mpihelp-mul.c - MPI helper functions
* Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
* Copyright (C) 1994, 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -29,10 +29,10 @@
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi-internal.h"
#include "longlong.h"
#include "g10lib.h" /* for g10_is_secure() */
#include "g10lib.h" /* g10_is_secure() */
#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
@ -373,6 +373,86 @@ mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
}
void
mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
mpi_ptr_t up, mpi_size_t usize,
mpi_ptr_t vp, mpi_size_t vsize,
struct karatsuba_ctx *ctx )
{
mpi_limb_t cy;
if( !ctx->tspace || ctx->tspace_size < vsize ) {
if( ctx->tspace )
mpi_free_limb_space( ctx->tspace );
ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
g10_is_secure( up ) || g10_is_secure( vp ) );
ctx->tspace_size = vsize;
}
MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
prodp += vsize;
up += vsize;
usize -= vsize;
if( usize >= vsize ) {
if( !ctx->tp || ctx->tp_size < vsize ) {
if( ctx->tp )
mpi_free_limb_space( ctx->tp );
ctx->tp = mpi_alloc_limb_space( 2 * vsize, g10_is_secure( up )
|| g10_is_secure( vp ) );
ctx->tp_size = vsize;
}
do {
MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
prodp += vsize;
up += vsize;
usize -= vsize;
} while( usize >= vsize );
}
if( usize ) {
if( usize < KARATSUBA_THRESHOLD ) {
mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
}
else {
if( !ctx->next ) {
ctx->next = g10_xcalloc( 1, sizeof *ctx );
}
mpihelp_mul_karatsuba_case( ctx->tspace,
vp, vsize,
up, usize,
ctx->next );
}
cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
}
}
void
mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
{
struct karatsuba_ctx *ctx2;
if( ctx->tp )
mpi_free_limb_space( ctx->tp );
if( ctx->tspace )
mpi_free_limb_space( ctx->tspace );
for( ctx=ctx->next; ctx; ctx = ctx2 ) {
ctx2 = ctx->next;
if( ctx->tp )
mpi_free_limb_space( ctx->tp );
if( ctx->tspace )
mpi_free_limb_space( ctx->tspace );
g10_free( ctx );
}
}
/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
* and v (pointed to by VP, with VSIZE limbs), and store the result at
* PRODP. USIZE + VSIZE limbs are always stored, but if the input
@ -394,7 +474,7 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
{
mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
mpi_limb_t cy;
mpi_ptr_t tspace;
struct karatsuba_ctx ctx;
if( vsize < KARATSUBA_THRESHOLD ) {
mpi_size_t i;
@ -438,34 +518,9 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
return cy;
}
tspace = mpi_alloc_limb_space( 2 * vsize,
g10_is_secure( up ) || g10_is_secure( vp ) );
MPN_MUL_N_RECURSE( prodp, up, vp, vsize, tspace );
prodp += vsize;
up += vsize;
usize -= vsize;
if( usize >= vsize ) {
mpi_ptr_t tp = mpi_alloc_limb_space( 2 * vsize, g10_is_secure( up )
|| g10_is_secure( vp ) );
do {
MPN_MUL_N_RECURSE( tp, up, vp, vsize, tspace );
cy = mpihelp_add_n( prodp, prodp, tp, vsize );
mpihelp_add_1( prodp + vsize, tp + vsize, vsize, cy );
prodp += vsize;
up += vsize;
usize -= vsize;
} while( usize >= vsize );
mpi_free_limb_space( tp );
}
if( usize ) {
mpihelp_mul( tspace, vp, vsize, up, usize );
cy = mpihelp_add_n( prodp, prodp, tspace, vsize);
mpihelp_add_1( prodp + vsize, tspace + vsize, usize, cy );
}
mpi_free_limb_space( tspace );
memset( &ctx, 0, sizeof ctx );
mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
mpihelp_release_karatsuba_ctx( &ctx );
return *prod_endp;
}

View file

@ -0,0 +1,7 @@
mpih-add1.S
mpih-lshift.S
mpih-mul1.S
mpih-mul2.S
mpih-mul3.S
mpih-rshift.S
mpih-sub1.S

86
mpi/power/mpih-add1.S Normal file
View file

@ -0,0 +1,86 @@
/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length.
*
* Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
*/
.toc
.extern mpihelp_add_n[DS]
.extern .mpihelp_add_n
.csect [PR]
.align 2
.globl mpihelp_add_n
.globl .mpihelp_add_n
.csect mpihelp_add_n[DS]
mpihelp_add_n:
.long .mpihelp_add_n, TOC[tc0], 0
.csect [PR]
.mpihelp_add_n:
andil. 10,6,1 # odd or even number of limbs?
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before it's used
sri 10,6,1 # count for unrolled loop
a 7,0,8 # add least significant limbs, set cy
mtctr 10 # copy count into CTR
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
# We have an odd # of limbs. Add the first limbs separately.
cmpi 1,10,0 # is count for unrolled loop zero?
bne 1,L1 # branch if not
st 7,4(3)
aze 3,10 # use the fact that r10 is zero...
br # return
# We added least significant limbs. Now reload the next limbs to enter loop.
L1: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3)
ae 7,0,8 # add limbs, set cy
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
ae 11,9,10 # add previous limbs with cy, set cy
stu 7,4(3) #
lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
ae 7,0,8 # add previous limbs with cy, set cy
stu 11,4(3) #
bdn Loop # decrement CTR and loop back
Lend: ae 11,9,10 # add limbs with cy, set cy
st 7,4(3) #
st 11,8(3) #
lil 3,0 # load cy into ...
aze 3,3 # ... return value register
br

64
mpi/power/mpih-lshift.S Normal file
View file

@ -0,0 +1,64 @@
/* IBM POWER lshift
*
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
*/
.toc
.extern mpihelp_lshift[DS]
.extern .mpihelp_lshift
.csect [PR]
.align 2
.globl mpihelp_lshift
.globl .mpihelp_lshift
.csect mpihelp_lshift[DS]
mpihelp_lshift:
.long .mpihelp_lshift, TOC[tc0], 0
.csect [PR]
.mpihelp_lshift:
sli 0,5,2
cax 9,3,0
cax 4,4,0
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
lu 0,-4(4) # read most significant limb
sre 3,0,8 # compute carry out limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,-4(4) # read 2:nd most significant limb
sreq 7,0,8 # compute most significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,-4(4) # load next lower limb
stu 7,-4(9) # store previous result during read latency
sreq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,-4(9) # store 2:nd least significant limb
Lend2: sle 7,0,6 # compute least significant limb
st 7,-4(9) # store it
br

115
mpi/power/mpih-mul1.S Normal file
View file

@ -0,0 +1,115 @@
/* IBM POWER mul_1 -- Multiply a limb vector with a limb and store
* the result in a second limb vector.
*
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
*/
.toc
.csect .mpihelp_mul_1[PR]
.align 2
.globl mpihelp_mul_1
.globl .mpihelp_mul_1
.csect mpihelp_mul_1[DS]
mpihelp_mul_1:
.long .mpihelp_mul_1[PR], TOC[tc0], 0
.csect .mpihelp_mul_1[PR]
.mpihelp_mul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
ai 0,0,0 # reset carry
cax 9,9,7
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
cax 10,10,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,9
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
cax 9,9,0 # adjust high limb for negative s2_limb
mfmq 0
ae 8,0,10
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

130
mpi/power/mpih-mul2.S Normal file
View file

@ -0,0 +1,130 @@
/* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add
* the result to a second limb vector.
*
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
*/
.toc
.csect .mpihelp_addmul_1[PR]
.align 2
.globl mpihelp_addmul_1
.globl .mpihelp_addmul_1
.csect mpihelp_addmul_1[DS]
mpihelp_addmul_1:
.long .mpihelp_addmul_1[PR], TOC[tc0], 0
.csect .mpihelp_addmul_1[PR]
.mpihelp_addmul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 8
cax 9,9,7
l 7,4(3)
a 8,8,7 # add res_limb
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 8,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 8,0,10
l 7,4(3)
aze 9,9
a 8,8,7
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 8,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 8,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
a 8,8,7 # add res_limb
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

135
mpi/power/mpih-mul3.S Normal file
View file

@ -0,0 +1,135 @@
/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract
* the result from a second limb vector.
*
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# size r5
# s2_limb r6
# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To
# obtain that operation, we have to use the 32x32->64 signed multiplication
# instruction, and add the appropriate compensation to the high limb of the
# result. We add the multiplicand if the multiplier has its most significant
# bit set, and we add the multiplier if the multiplicand has its most
# significant bit set. We need to preserve the carry flag between each
# iteration, so we have to compute the compensation carefully (the natural,
# srai+and doesn't work). Since the POWER architecture has a branch unit
# we can branch in zero cycles, so that's how we perform the additions.
*/
.toc
.csect .mpihelp_submul_1[PR]
.align 2
.globl mpihelp_submul_1
.globl .mpihelp_submul_1
.csect mpihelp_submul_1[DS]
mpihelp_submul_1:
.long .mpihelp_submul_1[PR], TOC[tc0], 0
.csect .mpihelp_submul_1[PR]
.mpihelp_submul_1:
cal 3,-4(3)
l 0,0(4)
cmpi 0,6,0
mtctr 5
mul 9,0,6
srai 7,0,31
and 7,7,6
mfmq 11
cax 9,9,7
l 7,4(3)
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
blt Lneg
Lpos: bdz Lend
Lploop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 0
ae 11,0,9 # low limb + old_cy_limb + old cy
l 7,4(3)
aze 10,10 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Lp0
cax 10,10,6 # adjust high limb for negative limb from s1
Lp0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 0
ae 11,0,10
l 7,4(3)
aze 9,9
sf 8,11,7
a 11,8,11 # invert cy (r11 is junk)
bge Lp1
cax 9,9,6 # adjust high limb for negative limb from s1
Lp1: bdn Lploop
b Lend
Lneg: cax 9,9,0
bdz Lend
Lnloop: lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 10,0,6
mfmq 7
ae 11,7,9
l 7,4(3)
ae 10,10,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln0
cax 10,10,6 # adjust high limb for negative limb from s1
Ln0: bdz Lend0
lu 0,4(4)
stu 8,4(3)
cmpi 0,0,0
mul 9,0,6
mfmq 7
ae 11,7,10
l 7,4(3)
ae 9,9,0 # propagate cy to new cy_limb
sf 8,11,7 # add res_limb
a 11,8,11 # invert cy (r11 is junk)
bge Ln1
cax 9,9,6 # adjust high limb for negative limb from s1
Ln1: bdn Lnloop
b Lend
Lend0: cal 9,0(10)
Lend: st 8,4(3)
aze 3,9
br

64
mpi/power/mpih-rshift.S Normal file
View file

@ -0,0 +1,64 @@
/* IBM POWER rshift
*
* Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s_ptr r4
# size r5
# cnt r6
*/
.toc
.extern mpihelp_rshift[DS]
.extern .mpihelp_rshift
.csect [PR]
.align 2
.globl mpihelp_rshift
.globl .mpihelp_rshift
.csect mpihelp_rshift[DS]
mpihelp_rshift:
.long .mpihelp_rshift, TOC[tc0], 0
.csect [PR]
.mpihelp_rshift:
sfi 8,6,32
mtctr 5 # put limb count in CTR loop register
l 0,0(4) # read least significant limb
ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
sle 3,0,8 # compute carry limb, and init MQ register
bdz Lend2 # if just one limb, skip loop
lu 0,4(4) # read 2:nd least significant limb
sleq 7,0,8 # compute least significant limb of result
bdz Lend # if just two limb, skip loop
Loop: lu 0,4(4) # load next higher limb
stu 7,4(9) # store previous result during read latency
sleq 7,0,8 # compute result limb
bdn Loop # loop back until CTR is zero
Lend: stu 7,4(9) # store 2:nd most significant limb
Lend2: sre 7,0,6 # compute most significant limb
st 7,4(9) # store it
br

87
mpi/power/mpih-sub1.S Normal file
View file

@ -0,0 +1,87 @@
/* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length.
*
* Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
* GnuPG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* GnuPG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "sysdep.h"
#include "asm-syntax.h"
/*
# INPUT PARAMETERS
# res_ptr r3
# s1_ptr r4
# s2_ptr r5
# size r6
*/
.toc
.extern mpihelp_sub_n[DS]
.extern .mpihelp_sub_n
.csect [PR]
.align 2
.globl mpihelp_sub_n
.globl .mpihelp_sub_n
.csect mpihelp_sub_n[DS]
mpihelp_sub_n:
.long .mpihelp_sub_n, TOC[tc0], 0
.csect [PR]
.mpihelp_sub_n:
andil. 10,6,1 # odd or even number of limbs?
l 8,0(4) # load least significant s1 limb
l 0,0(5) # load least significant s2 limb
cal 3,-4(3) # offset res_ptr, it's updated before it's used
sri 10,6,1 # count for unrolled loop
sf 7,0,8 # subtract least significant limbs, set cy
mtctr 10 # copy count into CTR
beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
# We have an odd # of limbs. Add the first limbs separately.
cmpi 1,10,0 # is count for unrolled loop zero?
bne 1,L1 # branch if not
st 7,4(3)
sfe 3,0,0 # load !cy into ...
sfi 3,3,0 # ... return value register
br # return
# We added least significant limbs. Now reload the next limbs to enter loop.
L1: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
stu 7,4(3)
sfe 7,0,8 # subtract limbs, set cy
Leven: lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
bdz Lend # If done, skip loop
Loop: lu 8,4(4) # load s1 limb and update s1_ptr
lu 0,4(5) # load s2 limb and update s2_ptr
sfe 11,10,9 # subtract previous limbs with cy, set cy
stu 7,4(3) #
lu 9,4(4) # load s1 limb and update s1_ptr
lu 10,4(5) # load s2 limb and update s2_ptr
sfe 7,0,8 # subtract previous limbs with cy, set cy
stu 11,4(3) #
bdn Loop # decrement CTR and loop back
Lend: sfe 11,10,9 # subtract limbs with cy, set cy
st 7,4(3) #
st 11,8(3) #
sfe 3,0,0 # load !cy into ...
sfi 3,3,0 # ... return value register
br