Update head to match stable 1.0

2025-07-03 22:56:33 +02:00 · 2002-06-29 14:15:02 +00:00 · 2002-06-29 14:15:02 +00:00 · 0ed6228293
commit 0ed6228293
parent df58e024e7
144 changed files with 18930 additions and 673 deletions
--- a/mpi/ChangeLog
+++ b/mpi/ChangeLog
@ -0,0 +1,325 @@
+2002-05-10  Stefan Bellon  <sbellon@sbellon.de>
+
+	* mpiutil.c (mpi_alloc_like/mpi_debug_alloc_like): Added code
+	for M_DEBUG.
+
+2002-04-18  Werner Koch  <wk@gnupg.org>
+
+	* i386/syntax.h (ALIGN): Removed parens from definition
+	* i386/mpih-add1.S, i386/mpih-sub1.S, i386/mpih-rshift.S,
+	i386/mpih-lshift.S: Minor syntax changes suggested by Mark Pettit
+	after comparing the files with those for GMP 4.
+	
+2001-11-08  Werner Koch  <wk@gnupg.org>
+
+	* config.links (mpi_sflags): Add extra rule for sparc64-sun-solaris2.
+
+2001-08-20  Werner Koch  <wk@gnupg.org>
+
+	* longlong.h [__riscos__]: Need a special pragma here.
+
+2001-08-09  Werner Koch  <wk@gnupg.org>
+
+	* config.links: Added configuraton for powerpc-openbsd.  By Peter
+	Valchev
+
+2001-07-09  Werner Koch  <wk@gnupg.org>
+
+	* config.links: Changed the way the list of files to be
+	symlinked is returned.
+
+2001-05-27  Werner Koch  <wk@gnupg.org>
+
+	* hppa/, hppa1.1/, pa7100/ : Use .label command instead of labels
+	because there syntax changed.  By Matthew Wilcox.
+
+2001-05-06  Werner Koch  <wk@gnupg.org>
+
+	* longlong.h: Fixes for ARM by Phil Blundell.
+
+2001-04-17  Werner Koch  <wk@gnupg.org>
+
+	Updated copyright notices.
+
+2001-03-24  Werner Koch  <wk@gnupg.org>
+
+	* mpi-mul.c (mpi_mul): Make sure that secret temporary results are
+	not stored in w.  Suggested by Florian Weimer.
+
+2001-03-18  Werner Koch  <wk@gnupg.org>
+
+	* config.links: Use i386 code for i386.  According to tests by
+	Kevin Ryde the i586 code runs slow on i386 CPUs.  Ditto for i786.
+
+2000-10-24  Werner Koch  <wk@gnupg.org>
+
+	* mips3/: Changed a few comments to C-style. By Jeff Long.
+
+2000-10-13  Werner Koch  <wk@gnupg.org>
+
+	* mpi.h: Removed the inclusion of mpi-asm-defs.h because this
+	makes some trouble when doing a VPATH build.  configure now
+	takes care of it.
+	
+2000-10-12  Werner Koch  <wk@gnupg.org>
+
+	* generic/mpi-asm-defs.h: New.
+	* mips3/mpi-asm-defs.h: New.
+	* config.links: Create a link to one of the above files.
+
+Wed Jul 19 11:26:43 CEST 2000  Werner Koch  <wk@>
+
+        * config.links: Support for powerpc--netbsd by Gabriel Rosenkoetter.
+
+Wed Mar 22 13:50:24 CET 2000  Werner Koch  <wk@openit.de>
+
+	* config.links: Add support for FreeBSD 5 and made the case stmt
+	looking nicer.	From Jun Kuriyama.
+
+Fri Mar 17 17:50:25 CET 2000  Werner Koch  <wk@openit.de>
+
+	* config.links (sparc64-unknown-linux-gnu): use udic module.
+	From Adam Mitchell.
+
+2000-03-14 12:03:56  Werner Koch  (wk@habibti.openit.de)
+
+	* Makefile.am: Do not use .s and .S files but a temp names, so that
+	OSes with caseinsensitive filenames do work.  From Frank Donahoe.
+
+Tue Mar  7 18:45:31 CET 2000  Werner Koch  <wk@gnupg.de>
+
+	* mpih-mul.c (mpihelp_mul_karatsuba_case): It seems that the
+	untested part works fine.  Removed the debugging message.
+
+	* longlong.h (umul_ppmm): Fixes for ARM-4. By Sean MacLennan.
+
+	* config.links: Add support for NetBSD.
+
+Thu Jan 13 19:31:58 CET 2000  Werner Koch  <wk@gnupg.de>
+
+	* mpi-internal.h (karatsuba_ctx): New.
+	* mpih-mul.c (mpihelp_release_karatsuba_ctx): New.
+	(mpihelp_mul_karatsuba_case): New.
+	(mpihelp_mul): Splitted to make use of the new functions.
+	* mpi-pow.c (mpi_powm): Make use of the new splitted function
+	to avoid multiple allocation of temporary memory during the
+	karatsuba operations.
+
+	* mpi_mpow.c: Removed the unused Barrett code.
+
+Sun Dec 19 15:22:26 CET 1999  Werner Koch  <wk@gnupg.de>
+
+	* power/ : Converted more comments to C comments because some AS
+	complain about ' in comments.
+
+Thu Dec 16 10:07:58 CET 1999  Werner Koch  <wk@gnupg.de>
+
+	* Makefile.am: c/SFLAGS/ASFLAGS/. This has only been used by the
+	powerpc and actually never passed the -Wa,foo to the cc.
+
+Thu Dec  9 10:31:05 CET 1999  Werner Koch  <wk@gnupg.de>
+
+	* power/: Add all files from GMP for this CPU.
+
+	* config.links:  Support for BSDI 4.x. By Wayne Chapeskie.
+	(sparc8): Made the search path the same as sparc9
+
+	* mpih-div.c (mpihelp_divrem): The MPN_COPY_DECR copied one
+	elemnat too many.  This is gmp2.0.2p9.txt patch.
+
+Sat Oct  9 20:34:41 CEST 1999  Werner Koch  <wk@gnupg.de>
+
+	* Makefile.am:	Removed libtool.
+
+Mon Aug 30 20:38:33 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* config.links: Add case label for DJGPP
+
+Wed Jul 14 19:42:08 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+
+	* Makefile.am: Use .s files as temporaries, disabled other .S rules.
+
+Wed Jul  7 13:08:40 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+
+	* mpicoder.c (g10_log_mpidump): New.
+
+	* Makefile.am: Support for libtool.
+
+Fri Jul  2 11:45:54 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+
+	* mpi-bit.c (mpi_lshift_limbs,mpi_rshift_limbs): New.
+	* mpi-mpow.c (barrett_mulm): New but diabled.
+
+Tue Jun  1 16:01:46 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* config.links (i[56]86*-*-freebsdelf*): New.
+
+Sun May 23 14:20:22 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* config.links (sysdep.h): Not any more conditionally created.
+
+Tue May  4 15:47:53 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mpiutil.c (mpi_alloc_like): New.
+
+Mon Apr 26 17:48:15 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mpih-add.c, mpih-sub.c: Removed
+	* mpi-inline.c: New.
+	* mpi-inline.h: Make it usable by mpi-inline.c.
+
+Sun Apr 18 10:11:28 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mpih-mul.c (mpihelp_mul_n): Fixed use of memory region.
+	(mpihelp_mul): Ditto.
+
+Wed Apr  7 20:51:39 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* Makefile.am: Explicit rules to invoke cpp on *.S
+
+Mon Mar  8 20:47:17 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* config.links: Take advantage of the with_symbol_underscore macro.
+	Add support for freebsd 4.
+
+Wed Feb 24 11:07:27 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mips3/mpih-sub1.S: Removed left over junk in last line. (Should I
+	blame me or my editor?).
+
+Sat Feb 13 12:04:43 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* Makefile.am: Removed the +=. Add MPI_OPT_FLAGS.
+
+Sat Jan  9 16:02:23 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mpi-cmp.c (mpi_cmp_ui): Normalized the arg.
+
+Thu Jan  7 18:00:58 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* mpi-bit.c (mpi_normalize): New.
+	(mpi_get_nbits): Normalize the MPI.
+	* mpi-bit.c (mpi_cmp): Normalize the MPI before the compare.
+
+
+Tue Dec  8 13:15:16 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
+
+	* config.links: Moved the case for powerpc*linux
+	* powerpcp32/*.S: Removed some underscores.
+
+Thu Nov 26 07:27:52 1998  Werner Koch  <werner.koch@guug.de>
+
+	* config.links: Support for ppc with ELF
+	* powerpc32/syntax.h: New.
+	* powerpc32/*.S: Applied ELF patches (glibc patches)
+
+Tue Nov 10 19:31:37 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* power*/ : Started with stuff for PPC
+	* config.links: Some stuff for PPC.
+	* generic/udiv-w-sdiv.c: New but disabled.
+
+Tue Oct 27 12:37:46 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links (freebsd): Fixes for FreeBSD 3.0
+
+Wed Oct 14 09:59:30 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links (freebsd): ELF patches from Jun Kuriyama.
+
+Thu Oct  8 13:28:17 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpi-mpow.c (mpi_mulpowm): Fixed mem leak (m_free/mpi_free).
+
+Thu Sep 17 18:08:50 1998  Werner Koch  (wk@(none))
+
+	* hppa1.1/udiv-qrnnd.S: Fix from Steffen Zahn for HPUX 10.20
+
+Thu Aug  6 16:39:28 1998  Werner Koch,mobil,,,	(wk@tobold)
+
+	* mpi-bit.c (mpi_set_bytes): Removed.
+
+Wed Aug  5 15:11:12 1998  Werner Koch  (wk@(none))
+
+	* mpicoder.c (mpi_read_from_buffer): New.
+
+	* mpiutil.c (mpi_set_opaque): New.
+	(mpi_get_opaque): New.
+	(mpi_copy): Changed to support opauqe flag
+	(mpi_free): Ditto.
+
+Sat Jul  4 10:11:11 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpiutil.c (mpi_clear): Reset flags.
+	(mpi_set): Ditto.
+	(mpi_alloc_secure): Set flag to 1 and not ored the 1 in, tsss..
+
+Fri Jun 26 11:19:06 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpiutil.c (mpi_alloc): set nbits to 0.
+	(mpi_alloc_secure): Ditto.
+	(mpi_clear): Ditto.
+
+Thu Jun 25 11:50:01 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mips3/*.S: New
+
+Mon May 18 13:47:06 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links: split mpih-shift into mpih-[lr]shift and
+	changed all implementations.
+	* mpi/alpha: add some new assembler stuff.
+
+Wed May 13 11:04:29 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links: Add support for MIPS
+
+Thu Apr  9 11:31:36 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpicoder.c (mpi_get_secure_buffer): New.
+
+Wed Apr  8 09:44:33 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links: Applied small fix from Ulf Möller.
+
+Mon Apr  6 12:38:52 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpicoder.c (mpi_get_buffer): Removed returned leading zeroes
+	and changed all callers.
+
+Tue Mar 10 13:40:34 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpi-bit.c (mpi_clear_highbit): New.
+
+Mon Mar  2 19:29:00 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* Makefile.am (DISTCLEANFILES): New
+
+Thu Feb 26 06:48:54 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links (X86_BROKEN_ALIGN): Added for some systems.
+
+Mon Feb 23 12:21:40 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* mpi/m68k/mpih-shift.S (Lspecial): Changed duplicate symbol.
+
+Mon Feb 16 13:00:27 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links : Add detection of m68k cpus
+
+
+
+ Copyright 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+
+ This file is free software; as a special exception the author gives
+ unlimited permission to copy and/or distribute it, with or without
+ modifications, as long as this notice is preserved.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@ -0,0 +1,79 @@
+# Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+#
+# This file is part of GnuPG.
+#
+# GnuPG is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# GnuPG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+## Process this file with automake to produce Makefile.in
+
+
+INCLUDES = -I.. -I$(top_srcdir)/include
+CFLAGS = @CFLAGS@ @MPI_OPT_FLAGS@
+ASFLAGS = @MPI_SFLAGS@
+
+EXTRA_DIST = config.links
+DISTCLEANFILES = mpi-asm-defs.h \
+                 mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S  \
+		 mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
+# Note: we only use .S files so we should delete all left over .s
+CLEANFILES = _*.s
+
+noinst_LIBRARIES = libmpi.a
+
+# libmpi_a_LDFLAGS =
+libmpi_a_SOURCES = longlong.h	  \
+	      mpi-add.c      \
+	      mpi-bit.c      \
+	      mpi-cmp.c      \
+	      mpi-div.c      \
+	      mpi-gcd.c      \
+	      mpi-internal.h \
+	      mpi-inline.h   \
+	      mpi-inline.c   \
+	      mpi-inv.c      \
+	      mpi-mul.c      \
+	      mpi-pow.c      \
+	      mpi-mpow.c     \
+	      mpi-scan.c     \
+	      mpicoder.c     \
+	      mpih-cmp.c     \
+	      mpih-div.c     \
+	      mpih-mul.c     \
+	      mpiutil.c      \
+	      g10m.c
+
+# Note this objects are actually links, the sourcefiles are
+# distributed by special code in dist-hook
+common_asm_objects = mpih-mul1.o    \
+		     mpih-mul2.o    \
+		     mpih-mul3.o    \
+		     mpih-add1.o    \
+		     mpih-sub1.o    \
+		     mpih-lshift.o  \
+		     mpih-rshift.o
+
+libmpi_a_DEPENDENCIES = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
+libmpi_a_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
+
+# cancel the default rules used by libtool which do not really
+# work and add one to cpp .S files
+.S.o:
+	 $(CPP) $(INCLUDES) $(DEFS) $< | grep -v '^#' > _$*.s
+	 $(COMPILE) -c _$*.s
+	 mv -f _$*.o $*.o
+
+.S.lo:
+
+
--- a/mpi/alpha/README
+++ b/mpi/alpha/README
@ -0,0 +1,53 @@
+This directory contains mpn functions optimized for DEC Alpha processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+EV4
+
+1. This chip has very limited store bandwidth.  The on-chip L1 cache is
+write-through, and a cache line is transfered from the store buffer to the
+off-chip L2 in as much 15 cycles on most systems.  This delay hurts
+mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift.
+
+2. Pairing is possible between memory instructions and integer arithmetic
+instructions.
+
+3. mulq and umulh is documented to have a latency of 23 cycles, but 2 of
+these cycles are pipelined.  Thus, multiply instructions can be issued at a
+rate of one each 21nd cycle.
+
+EV5
+
+1. The memory bandwidth of this chip seems excellent, both for loads and
+stores.  Even when the working set is larger than the on-chip L1 and L2
+caches, the perfromance remain almost unaffected.
+
+2. mulq has a measured latency of 13 cycles and an issue rate of 1 each 8th
+cycle.  umulh has a measured latency of 15 cycles and an issue rate of 1
+each 10th cycle.  But the exact timing is somewhat confusing.
+
+3. mpn_add_n.  With 4-fold unrolling, we need 37 instructions, whereof 12
+   are memory operations.  This will take at least
+	ceil(37/2) [dual issue] + 1 [taken branch] = 20 cycles
+   We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data
+   cache cycles, which should be completely hidden in the 20 issue cycles.
+   The computation is inherently serial, with these dependencies:
+     addq
+     /   \
+   addq  cmpult
+     |     |
+   cmpult  |
+       \  /
+        or
+   I.e., there is a 4 cycle path for each limb, making 16 cycles the absolute
+   minimum.  We could replace the `or' with a cmoveq/cmovne, which would save
+   a cycle on EV5, but that might waste a cycle on EV4.  Also, cmov takes 2
+   cycles.
+     addq
+     /   \
+   addq  cmpult
+     |      \
+   cmpult -> cmovne
+
+STATUS
+
--- a/mpi/alpha/distfiles
+++ b/mpi/alpha/distfiles
@ -0,0 +1,11 @@
+README
+mpih-add1.S
+mpih-sub1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
+
+udiv-qrnnd.S
+
--- a/mpi/alpha/mpih-add1.S
+++ b/mpi/alpha/mpih-add1.S
@ -0,0 +1,124 @@
+/* alpha  add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ * Copyright (C) 1995, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	($16)
+ *		   mpi_ptr_t s1_ptr,	($17)
+ *		   mpi_ptr_t s2_ptr,	($18)
+ *		   mpi_size_t size)	($19)
+ */
+
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_add_n
+	.ent	mpihelp_add_n
+mpihelp_add_n:
+	.frame	$30,0,$26,0
+
+	ldq	$3,0($17)
+	ldq	$4,0($18)
+
+	subq	$19,1,$19
+	and	$19,4-1,$2	# number of limbs in first loop
+	bis	$31,$31,$0
+	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
+
+	subq	$19,$2,$19
+
+.Loop0: subq	$2,1,$2
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	addq	$17,8,$17
+	addq	$18,8,$18
+	bis	$5,$5,$3
+	bis	$6,$6,$4
+	addq	$16,8,$16
+	bne	$2,.Loop0
+
+.L0:	beq	$19,.Lend
+
+	.align	3
+.Loop:	subq	$19,4,$19
+
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	ldq	$3,16($17)
+	addq	$6,$0,$6
+	ldq	$4,16($18)
+	cmpult	$6,$0,$1
+	addq	$5,$6,$6
+	cmpult	$6,$5,$0
+	stq	$6,8($16)
+	or	$0,$1,$0
+
+	ldq	$5,24($17)
+	addq	$4,$0,$4
+	ldq	$6,24($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,16($16)
+	or	$0,$1,$0
+
+	ldq	$3,32($17)
+	addq	$6,$0,$6
+	ldq	$4,32($18)
+	cmpult	$6,$0,$1
+	addq	$5,$6,$6
+	cmpult	$6,$5,$0
+	stq	$6,24($16)
+	or	$0,$1,$0
+
+	addq	$17,32,$17
+	addq	$18,32,$18
+	addq	$16,32,$16
+	bne	$19,.Loop
+
+.Lend:	addq	$4,$0,$4
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_add_n
+
--- a/mpi/alpha/mpih-lshift.S
+++ b/mpi/alpha/mpih-lshift.S
@ -0,0 +1,123 @@
+/* alpha - left shift
+ *
+ *      Copyright (C) 1994, 1995, 1998, 2001 Free Software Foundation, Inc.
+ *
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(r16)
+ *		   mpi_ptr_t up,	(r17)
+ *		   mpi_size_t usize,	(r18)
+ *		   unsigned cnt)	(r19)
+ *
+ * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+ * it would take 4 cycles/limb.  It should be possible to get down to 3
+ * cycles/limb since both ldq and stq can be paired with the other used
+ * instructions.  But there are many restrictions in the 21064 pipeline that
+ * makes it hard, if not impossible, to get down to 3 cycles/limb:
+ *
+ * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ * 2. Only aligned instruction pairs can be paired.
+ * 3. The store buffer or silo might not be able to deal with the bandwidth.
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_lshift
+	.ent	mpihelp_lshift
+mpihelp_lshift:
+	.frame	$30,0,$26,0
+
+	s8addq	$18,$17,$17	# make r17 point at end of s1
+	ldq	$4,-8($17)	# load first limb
+	subq	$17,8,$17
+	subq	$31,$19,$7
+	s8addq	$18,$16,$16	# make r16 point at end of RES
+	subq	$18,1,$18
+	and	$18,4-1,$20	# number of limbs in first loop
+	srl	$4,$7,$0	# compute function result
+
+	beq	$20,.L0
+	subq	$18,$20,$18
+
+	.align	3
+.Loop0:
+	ldq	$3,-8($17)
+	subq	$16,8,$16
+	subq	$17,8,$17
+	subq	$20,1,$20
+	sll	$4,$19,$5
+	srl	$3,$7,$6
+	bis	$3,$3,$4
+	bis	$5,$6,$8
+	stq	$8,0($16)
+	bne	$20,.Loop0
+
+.L0:	beq	$18,.Lend
+
+	.align	3
+.Loop:	ldq	$3,-8($17)
+	subq	$16,32,$16
+	subq	$18,4,$18
+	sll	$4,$19,$5
+	srl	$3,$7,$6
+
+	ldq	$4,-16($17)
+	sll	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,24($16)
+	srl	$4,$7,$2
+
+	ldq	$3,-24($17)
+	sll	$4,$19,$5
+	bis	$1,$2,$8
+	stq	$8,16($16)
+	srl	$3,$7,$6
+
+	ldq	$4,-32($17)
+	sll	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,8($16)
+	srl	$4,$7,$2
+
+	subq	$17,32,$17
+	bis	$1,$2,$8
+	stq	$8,0($16)
+
+	bgt	$18,.Loop
+
+.Lend:	sll	$4,$19,$8
+	stq	$8,-8($16)
+	ret	$31,($26),1
+	.end	mpihelp_lshift
+
+
--- a/mpi/alpha/mpih-mul1.S
+++ b/mpi/alpha/mpih-mul1.S
@ -0,0 +1,90 @@
+/* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store
+ *			      the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 
+                      2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(r16)
+ *		  mpi_ptr_t s1_ptr,	(r17)
+ *		  mpi_size_t s1_size,	(r18)
+ *		  mpi_limb_t s2_limb)	(r19)
+ *
+ * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+ *
+ * To improve performance for long multiplications, we would use
+ * 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
+ * these instructions without slowing down the general code: 1. We can
+ * only have two prefetches in operation at any time in the Alpha
+ * architecture.  2. There will seldom be any special alignment
+ * between RES_PTR and S1_PTR.	Maybe we can simply divide the current
+ * loop into an inner and outer loop, having the inner loop handle
+ * exactly one prefetch block?
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_mul_1
+	.ent	mpihelp_mul_1 2
+mpihelp_mul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	bic	$31,$31,$4	# clear cy_limb
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,Lend1	# jump if size was == 1
+	ldq	$2,8($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	stq	$3,0($16)
+	beq	$18,Lend2	# jump if size was == 2
+
+	.align	3
+Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,16($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	stq	$3,8($16)
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$16,8,$16	# res_ptr++
+	bne	$18,Loop
+
+Lend2:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	stq	$3,8($16)
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+Lend1:	stq	$3,0($16)
+	ret	$31,($26),1
+
+	.end	mpihelp_mul_1
+
+
--- a/mpi/alpha/mpih-mul2.S
+++ b/mpi/alpha/mpih-mul2.S
@ -0,0 +1,97 @@
+/* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add
+ *			   the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (r16)
+ *		     mpi_ptr_t s1_ptr,	     (r17)
+ *		     mpi_size_t s1_size,     (r18)
+ *		     mpi_limb_t s2_limb)     (r19)
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_addmul_1
+	.ent	mpihelp_addmul_1 2
+mpihelp_addmul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	addq	$5,$3,$3
+	cmpult	$3,$5,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2: mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1: addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_addmul_1
+
--- a/mpi/alpha/mpih-mul3.S
+++ b/mpi/alpha/mpih-mul3.S
@ -0,0 +1,95 @@
+/* Alpha 21064	submul_1 -- Multiply a limb vector with a limb and
+ *			    subtract the result from a second limb vector.
+ *      Copyright (C) 1992, 1994, 1995, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (r16   )
+ *		     mpi_ptr_t s1_ptr,	     (r17   )
+ *		     mpi_size_t s1_size,     (r18   )
+ *		     mpi_limb_t s2_limb)     (r19   )
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_submul_1
+	.ent	mpihelp_submul_1 2
+mpihelp_submul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	subq	$5,$3,$3
+	cmpult	$5,$3,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2: mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1: subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_submul_1
+
--- a/mpi/alpha/mpih-rshift.S
+++ b/mpi/alpha/mpih-rshift.S
@ -0,0 +1,120 @@
+/* alpha    rshift
+ *      Copyright (C) 1994, 1995, 1998, 1999,
+ *                    2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(r16)
+ *		   mpi_ptr_t up,	(r17)
+ *		   mpi_size_t usize,	(r18)
+ *		   unsigned cnt)	(r19)
+ *
+ * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+ * it would take 4 cycles/limb.  It should be possible to get down to 3
+ * cycles/limb since both ldq and stq can be paired with the other used
+ * instructions.  But there are many restrictions in the 21064 pipeline that
+ * makes it hard, if not impossible, to get down to 3 cycles/limb:
+ *
+ * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ * 2. Only aligned instruction pairs can be paired.
+ * 3. The store buffer or silo might not be able to deal with the bandwidth.
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_rshift
+	.ent	mpihelp_rshift
+mpihelp_rshift:
+	.frame	$30,0,$26,0
+
+	ldq	$4,0($17)	# load first limb
+	addq	$17,8,$17
+	subq	$31,$19,$7
+	subq	$18,1,$18
+	and	$18,4-1,$20	# number of limbs in first loop
+	sll	$4,$7,$0	# compute function result
+
+	beq	$20,.R0
+	subq	$18,$20,$18
+
+	.align	3
+.Roop0:
+	ldq	$3,0($17)
+	addq	$16,8,$16
+	addq	$17,8,$17
+	subq	$20,1,$20
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+	bis	$3,$3,$4
+	bis	$5,$6,$8
+	stq	$8,-8($16)
+	bne	$20,.Roop0
+
+.R0:	beq	$18,.Rend
+
+	.align	3
+.Roop:	ldq	$3,0($17)
+	addq	$16,32,$16
+	subq	$18,4,$18
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+
+	ldq	$4,8($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-32($16)
+	sll	$4,$7,$2
+
+	ldq	$3,16($17)
+	srl	$4,$19,$5
+	bis	$1,$2,$8
+	stq	$8,-24($16)
+	sll	$3,$7,$6
+
+	ldq	$4,24($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-16($16)
+	sll	$4,$7,$2
+
+	addq	$17,32,$17
+	bis	$1,$2,$8
+	stq	$8,-8($16)
+
+	bgt	$18,.Roop
+
+.Rend:	srl	$4,$19,$8
+	stq	$8,0($16)
+	ret	$31,($26),1
+	.end	mpihelp_rshift
+
--- a/mpi/alpha/mpih-sub1.S
+++ b/mpi/alpha/mpih-sub1.S
@ -0,0 +1,124 @@
+/* Alpha  sub_n -- Subtract two limb vectors of the same length > 0 and
+ *		  store difference in a third limb vector.
+ *      Copyright (C) 1995, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(r16)
+ *		   mpi_ptr_t s1_ptr,	(r17)
+ *		   mpi_ptr_t s2_ptr,	(r18)
+ *		   mpi_size_t size)	(r19)
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_sub_n
+	.ent	mpihelp_sub_n
+mpihelp_sub_n:
+	.frame	$30,0,$26,0
+
+	ldq	$3,0($17)
+	ldq	$4,0($18)
+
+	subq	$19,1,$19
+	and	$19,4-1,$2	# number of limbs in first loop
+	bis	$31,$31,$0
+	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
+
+	subq	$19,$2,$19
+
+.Loop0: subq	$2,1,$2
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	addq	$17,8,$17
+	addq	$18,8,$18
+	bis	$5,$5,$3
+	bis	$6,$6,$4
+	addq	$16,8,$16
+	bne	$2,.Loop0
+
+.L0:	beq	$19,.Lend
+
+	.align	3
+.Loop:	subq	$19,4,$19
+
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	ldq	$3,16($17)
+	addq	$6,$0,$6
+	ldq	$4,16($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,8($16)
+	or	$0,$1,$0
+
+	ldq	$5,24($17)
+	addq	$4,$0,$4
+	ldq	$6,24($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,16($16)
+	or	$0,$1,$0
+
+	ldq	$3,32($17)
+	addq	$6,$0,$6
+	ldq	$4,32($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,24($16)
+	or	$0,$1,$0
+
+	addq	$17,32,$17
+	addq	$18,32,$18
+	addq	$16,32,$16
+	bne	$19,.Loop
+
+.Lend:	addq	$4,$0,$4
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_sub_n
+
+
--- a/mpi/alpha/udiv-qrnnd.S
+++ b/mpi/alpha/udiv-qrnnd.S
@ -0,0 +1,161 @@
+/* Alpha 21064 __udiv_qrnnd
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+	.set noreorder
+	.set noat
+.text
+	.align	3
+	.globl	__udiv_qrnnd
+	.ent	__udiv_qrnnd
+__udiv_qrnnd:
+	.frame $30,0,$26,0
+	.prologue 0
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr $16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+
+	ldiq	cnt,16
+	blt	d,.Largedivisor
+
+.Loop1: cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,.Loop1
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+.Largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addq	$5,$6,$5
+
+.Loop2: cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,.Loop2
+
+	addq	n1,n1,n1
+	addq	$4,n1,n1
+	bne	$6,.LOdd
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+.LOdd:
+	/* q' in n0. r' in n1 */
+	addq	n1,n0,n1
+	cmpult	n1,n0,tmp	# tmp := carry from addq
+	beq	tmp,.LLp6
+	addq	n0,1,n0
+	subq	n1,d,n1
+.LLp6:	cmpult	n1,d,tmp
+	bne	tmp,.LLp7
+	addq	n0,1,n0
+	subq	n1,d,n1
+.LLp7:
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+	.end	__udiv_qrnnd
--- a/mpi/config.links
+++ b/mpi/config.links
@ -0,0 +1,310 @@
+# config.links - helper for ../configure
+# Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+#
+# This file is part of GnuPG.
+#
+# GnuPG is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# GnuPG is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+# sourced by ../configure to get the list of files to link
+# this should set $mpi_ln_list.
+# Note: this is called from the above directory.
+
+
+mpi_extra_modules=
+mpi_sflags=
+
+test -d ./mpi || mkdir ./mpi
+
+echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
+
+if test "$try_asm_modules" = "yes" ; then
+case "${target}" in
+    i[3467]86*-*-freebsd*-elf  | \
+    i[3467]86*-*-freebsd[3-9]* | \
+    i[3467]86*-*-freebsdelf*   | \
+    i[3467]86*-*-netbsd* )
+       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/i386/syntax.h	   >>./mpi/asm-syntax.h
+       path="i386"
+       ;;
+    i586*-*-freebsd*-elf  | \
+    i586*-*-freebsd[3-9]* | \
+    i586*-*-freebsdelf*   | \
+    i586*-*-netbsd*	     | \
+    pentium-*-netbsd*	     | \
+    pentiumpro-*-netbsd*)
+       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/i386/syntax.h	   >>./mpi/asm-syntax.h
+       path="i586 i386"
+       ;;
+    i[34]86*-*-bsdi4*)
+       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/i386/syntax.h	 >>./mpi/asm-syntax.h
+       path="i386"
+       ;;
+    i[3467]86*-*-linuxaout*  | \
+    i[3467]86*-*-linuxoldld* | \
+    i[3467]86*-*-*bsd*)
+	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
+	echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	path="i386"
+	;;
+    i586*-*-linuxaout*  | \
+    i586*-*-linuxoldld* | \
+    i586*-*-*bsd*)
+	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
+	echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	path="i586 i386"
+	;;
+    i[3467]86*-msdosdjgpp*)
+	echo '#define BSD_SYNTAX'        >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	 >>./mpi/asm-syntax.h
+	path="i386"
+	;;
+    i586*-msdosdjgpp*)
+	echo '#define BSD_SYNTAX'        >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	 >>./mpi/asm-syntax.h
+	path="i586 i386"
+	;;
+    i[3467]86*-*-*)
+	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	path="i386"
+	;;
+    i586*-*-*  | \
+    pentium-*-*   | \
+    pentiumpro-*-*)
+	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	path="i586 i386"
+	;;
+    alpha*-*-*)
+	echo '/* configured for alpha */' >>./mpi/asm-syntax.h
+	path="alpha"
+	mpi_extra_modules="udiv-qrnnd"
+	;;
+    hppa7000*-*-*)
+	echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
+	path="hppa1.1 hppa"
+	mpi_extra_modules="udiv-qrnnd"
+	;;
+    hppa1.0*-*-*)
+	echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h
+	path="hppa"
+	mpi_extra_modules="udiv-qrnnd"
+	;;
+    hppa*-*-*)	# assume pa7100
+	echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h
+	path="pa7100 hppa1.1 hppa"
+	mpi_extra_modules="udiv-qrnnd"
+	;;
+    sparc64-*-linux-gnu)
+	# An extra rule because we have an report for this one only.
+	# Should be compared against the next GMP version
+	echo '/* configured for sparc64-*-linux-gnu */' >>./mpi/asm-syntax.h
+	path="sparc32v8 sparc32"
+	mpi_extra_modules="udiv"
+	;;
+    sparc64-sun-solaris2*)
+	# Got a report that udiv is missing, so we try this one
+	echo '/* configured for sparc64-sun-solaris2 */' >>./mpi/asm-syntax.h
+	path="sparc32v8 sparc32"
+	mpi_extra_modules="udiv"
+        ;;
+    sparc9*-*-*     | \
+    sparc64*-*-*    | \
+    ultrasparc*-*-* )
+	echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h
+	path="sparc32v8 sparc32"
+	;;
+    sparc8*-*-*     | \
+    microsparc*-*-*)
+	echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h
+	path="sparc32v8 sparc32"
+	;;
+    supersparc*-*-*)
+	echo '/* configured for supersparc */' >>./mpi/asm-syntax.h
+	path="supersparc sparc32v8 sparc32"
+	mpi_extra_modules="udiv"
+	;;
+    sparc*-*-*)
+	echo '/* configured for sparc */' >>./mpi/asm-syntax.h
+	path="sparc32"
+	mpi_extra_modules="udiv"
+	;;
+    mips[34]*-*-* | \
+    mips*-*-irix6*)
+       echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h
+       path="mips3"
+       ;;
+    mips*-*-*)
+       echo '/* configured for MIPS2 */' >>./mpi/asm-syntax.h
+       path="mips2"
+       ;;
+
+    # Motorola 68k configurations.  Let m68k mean 68020-68040.
+    # mc68000 or mc68060 configurations need to be specified explicitly
+    m680[234]0*-*-linuxaout* | \
+    m68k*-*-linuxaout*)
+	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k/mc68020 m68k"
+	;;
+    m68060*-*-linuxaout*)
+	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k"
+	;;
+    m680[234]0*-*-linux* | \
+    m68k*-*-linux*)
+	echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	;;
+    m68060*-*-linux*)
+	echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k"
+	;;
+    m68k-atari-mint)
+	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k/mc68020 m68k"
+	;;
+    m68000*-*-* | \
+    m68060*-*-*)
+	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k/mc68000"
+	;;
+    m680[234]0*-*-* | \
+    m68k*-*-*)
+	echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/m68k/syntax.h	    >>./mpi/asm-syntax.h
+	path="m68k/mc68020 m68k"
+	;;
+
+    powerpc*-*-linux*)
+	echo '/* configured for powerpc/ELF */' >>./mpi/asm-syntax.h
+	echo '#define ELF_SYNTAX'               >>./mpi/asm-syntax.h
+	cat   $srcdir/mpi/powerpc32/syntax.h	>>./mpi/asm-syntax.h
+	path="powerpc32"
+	;;
+
+    powerpc*-*-netbsd* | powerpc*-*-openbsd*)
+       echo '/* configured {Open,Net}BSD on powerpc */' >>./mpi/asm-syntax.h
+       echo '#define ELF_SYNTAX'                 >>./mpi/asm-syntax.h
+       cat   $srcdir/mpi/powerpc32/syntax.h	 >>./mpi/asm-syntax.h
+       mpi_sflags="-Wa,-mppc"
+       path="powerpc32"
+       ;;
+
+    rs6000-*-aix[456789]*    | \
+    rs6000-*-aix3.2.[456789])
+	mpi_sflags="-Wa,-mpwr"
+	path="power"
+	mpi_extra_modules="udiv-w-sdiv"
+	;;
+    rs6000-*-* | \
+    power-*-*  | \
+    power2-*-*)
+	mpi_sflags="-Wa,-mppc"
+	path="power"
+	mpi_extra_modules="udiv-w-sdiv"
+	;;
+    powerpc-ibm-aix4.2.* )
+	# I am not sure about this one but a machine identified by
+	# powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code.
+	mpi_sflags="-Wa,-mpwr"
+	path="power"
+	mpi_extra_modules="udiv-w-sdiv"
+	;;
+    ppc601-*-*)
+	mpi_sflags="-Wa,-mppc"
+	path="power powerpc32"
+	;;
+    ppc60[234]*-*-* | \
+    powerpc*-*-*)
+	mpi_sflags="-Wa,-mppc"
+	path="powerpc32"
+	;;
+    ppc620-*-*	    | \
+    powerpc64*-*-*)
+	mpi_sflags="-Wa,-mppc"
+	path="powerpc64"
+	;;
+
+    *)
+	echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+	path=""
+	;;
+esac
+else
+    echo '/* Assembler modules disabled on request */' >>./mpi/asm-syntax.h
+    path=""
+fi
+
+
+# Make sysdep.h
+echo '/* created by config.links - do not edit */' >./mpi/sysdep.h
+if test x$ac_cv_sys_symbol_underscore = xyes; then
+    cat <<EOF >>./mpi/sysdep.h
+#if __STDC__
+#define C_SYMBOL_NAME(name) _##name
+#else
+#define C_SYMBOL_NAME(name) _/**/name
+#endif
+EOF
+else
+    cat <<EOF >>./mpi/sysdep.h
+#define C_SYMBOL_NAME(name) name
+EOF
+fi
+
+
+# fixme: grep these modules from Makefile.in
+mpi_ln_modules="${mpi_extra_modules} mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \
+		 mpih-lshift mpih-rshift mpih-sub1"
+
+mpi_ln_objects=
+mpi_ln_list=
+
+# try to get file to link from the assembler subdirectory and
+# if this fails get it from the generic subdirectory.
+path="$path generic"
+for fn in $mpi_ln_modules ; do
+    mpi_ln_objects="$mpi_ln_objects $fn.o"
+    for dir in $path ; do
+	rm -f $srcdir/mpi/$fn.[Sc]
+	if test -f $srcdir/mpi/$dir/$fn.S ; then
+            mpi_ln_list="$mpi_ln_list mpi/$fn.S:mpi/$dir/$fn.S"
+	    break;
+	elif test -f $srcdir/mpi/$dir/$fn.c ; then
+            mpi_ln_list="$mpi_ln_list mpi/$fn.c:mpi/$dir/$fn.c"
+	    break;
+	fi
+    done
+done
+
+# Same thing for the file which defines the limb size
+path="$path generic"
+for dir in $path ; do
+    rm -f $srcdir/mpi/mpi-asm-defs.h
+    if test -f $srcdir/mpi/$dir/mpi-asm-defs.h ; then
+        mpi_ln_list="$mpi_ln_list mpi/mpi-asm-defs.h:mpi/$dir/mpi-asm-defs.h"
+        break;
+    fi
+done
--- a/mpi/g10m.c
+++ b/mpi/g10m.c
@ -0,0 +1,95 @@
+/* g10m.c  -  Wrapper for MPI
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi.h"
+#include "util.h"
+
+/* FIXME: The modules should use functions from libgcrypt */
+
+const char *g10m_revision_string(int dummy) { return "$Revision$"; }
+
+MPI
+g10m_new( unsigned nbits )
+{
+    return mpi_alloc( (nbits+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB );
+}
+
+MPI
+g10m_new_secure( unsigned nbits )
+{
+    return mpi_alloc_secure( (nbits+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB );
+}
+
+void
+g10m_release( MPI a )
+{
+    mpi_free(a);
+}
+
+void
+g10m_resize( MPI a, unsigned nbits )
+{
+    mpi_resize( a, (nbits+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB );
+}
+
+MPI  g10m_copy( MPI a ) 	   { return mpi_copy( a );   }
+void g10m_swap( MPI a, MPI b)	   { mpi_swap( a, b );	     }
+void g10m_set( MPI w, MPI u)	   { mpi_set( w, u );	     }
+void g10m_set_ui( MPI w, ulong u ) { mpi_set_ui( w, u ); }
+
+int  g10m_cmp( MPI u, MPI v )	    { return mpi_cmp( u, v ); }
+int  g10m_cmp_ui( MPI u, ulong v )  { return mpi_cmp_ui( u, v ); }
+
+void g10m_add(MPI w, MPI u, MPI v)	  { mpi_add( w, u, v ); }
+void g10m_add_ui(MPI w, MPI u, ulong v )  { mpi_add_ui( w, u, v ); }
+void g10m_sub( MPI w, MPI u, MPI v)	  { mpi_sub( w, u, v ); }
+void g10m_sub_ui(MPI w, MPI u, ulong v )  { mpi_sub_ui( w, u, v ); }
+
+void g10m_mul( MPI w, MPI u, MPI v)	     { mpi_mul( w, u, v ); }
+void g10m_mulm( MPI w, MPI u, MPI v, MPI m)  { mpi_mulm( w, u, v, m ); }
+void g10m_mul_2exp( MPI w, MPI u, ulong cnt) { mpi_mul_2exp( w, u, cnt ); }
+void g10m_mul_ui(MPI w, MPI u, ulong v )     { mpi_mul_ui( w, u, v ); }
+
+void g10m_fdiv_q( MPI q, MPI d, MPI r )      { mpi_fdiv_q( q, d, r ); }
+
+void g10m_powm( MPI r, MPI b, MPI e, MPI m)  { mpi_powm( r, b, e, m );	}
+
+int  g10m_gcd( MPI g, MPI a, MPI b )	{ return mpi_gcd( g, a, b ); }
+int  g10m_invm( MPI x, MPI u, MPI v )	{ mpi_invm( x, u, v ); return 0; }
+
+unsigned g10m_get_nbits( MPI a )   { return mpi_get_nbits( a ); }
+
+unsigned
+g10m_get_size( MPI a )
+{
+    return mpi_get_nlimbs( a ) * BITS_PER_MPI_LIMB;
+}
+
+
+void
+g10m_set_buffer( MPI a, const char *buffer, unsigned nbytes, int sign )
+{
+    mpi_set_buffer( a, buffer, nbytes, sign );
+}
+
+
--- a/mpi/generic/distfiles
+++ b/mpi/generic/distfiles
@ -0,0 +1,10 @@
+mpih-add1.c
+mpih-mul1.c
+mpih-mul2.c
+mpih-mul3.c
+mpih-lshift.c
+mpih-rshift.c
+mpih-sub1.c
+udiv-w-sdiv.c
+mpi-asm-defs.h
+
--- a/mpi/generic/mpi-asm-defs.h
+++ b/mpi/generic/mpi-asm-defs.h
@ -0,0 +1,10 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
+
+
+
+
+
+
--- a/mpi/generic/mpih-add1.c
+++ b/mpi/generic/mpih-add1.c
@ -0,0 +1,65 @@
+/* mpihelp-add_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	       mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+    mpi_limb_t x, y, cy;
+    mpi_size_t j;
+
+    /* The loop counter and index J goes from -SIZE to -1.  This way
+       the loop becomes faster.  */
+    j = -size;
+
+    /* Offset the base pointers to compensate for the negative indices. */
+    s1_ptr -= j;
+    s2_ptr -= j;
+    res_ptr -= j;
+
+    cy = 0;
+    do {
+	y = s2_ptr[j];
+	x = s1_ptr[j];
+	y += cy;		  /* add previous carry to one addend */
+	cy = y < cy;		  /* get out carry from that addition */
+	y += x; 		  /* add other addend */
+	cy += y < x;		  /* get out carry from that add, combine */
+	res_ptr[j] = y;
+    } while( ++j );
+
+    return cy;
+}
+
--- a/mpi/generic/mpih-lshift.c
+++ b/mpi/generic/mpih-lshift.c
@ -0,0 +1,69 @@
+/* mpihelp-lshift.c  -	MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+
+mpi_limb_t
+mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+					    unsigned int cnt)
+{
+    mpi_limb_t high_limb, low_limb;
+    unsigned sh_1, sh_2;
+    mpi_size_t i;
+    mpi_limb_t retval;
+
+    sh_1 = cnt;
+    wp += 1;
+    sh_2 = BITS_PER_MPI_LIMB - sh_1;
+    i = usize - 1;
+    low_limb = up[i];
+    retval = low_limb >> sh_2;
+    high_limb = low_limb;
+    while( --i >= 0 ) {
+	low_limb = up[i];
+	wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+	high_limb = low_limb;
+    }
+    wp[i] = high_limb << sh_1;
+
+    return retval;
+}
+
+
--- a/mpi/generic/mpih-mul1.c
+++ b/mpi/generic/mpih-mul1.c
@ -0,0 +1,61 @@
+/* mpihelp-mul_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+						    mpi_limb_t s2_limb)
+{
+    mpi_limb_t cy_limb;
+    mpi_size_t j;
+    mpi_limb_t prod_high, prod_low;
+
+    /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+     * the loop becomes faster.  */
+    j = -s1_size;
+
+    /* Offset the base pointers to compensate for the negative indices.  */
+    s1_ptr -= j;
+    res_ptr -= j;
+
+    cy_limb = 0;
+    do {
+	umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
+	prod_low += cy_limb;
+	cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+	res_ptr[j] = prod_low;
+    } while( ++j );
+
+    return cy_limb;
+}
+
--- a/mpi/generic/mpih-mul2.c
+++ b/mpi/generic/mpih-mul2.c
@ -0,0 +1,66 @@
+/* mpihelp-mul_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+mpi_limb_t
+mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		  mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+    mpi_limb_t cy_limb;
+    mpi_size_t j;
+    mpi_limb_t prod_high, prod_low;
+    mpi_limb_t x;
+
+    /* The loop counter and index J goes from -SIZE to -1.  This way
+     * the loop becomes faster.  */
+    j = -s1_size;
+    res_ptr -= j;
+    s1_ptr -= j;
+
+    cy_limb = 0;
+    do {
+	umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
+
+	prod_low += cy_limb;
+	cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+
+	x = res_ptr[j];
+	prod_low = x + prod_low;
+	cy_limb += prod_low < x?1:0;
+	res_ptr[j] = prod_low;
+    } while ( ++j );
+    return cy_limb;
+}
+
+
--- a/mpi/generic/mpih-mul3.c
+++ b/mpi/generic/mpih-mul3.c
@ -0,0 +1,67 @@
+/* mpihelp-mul_3.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+mpi_limb_t
+mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		  mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+    mpi_limb_t cy_limb;
+    mpi_size_t j;
+    mpi_limb_t prod_high, prod_low;
+    mpi_limb_t x;
+
+    /* The loop counter and index J goes from -SIZE to -1.  This way
+     * the loop becomes faster.  */
+    j = -s1_size;
+    res_ptr -= j;
+    s1_ptr -= j;
+
+    cy_limb = 0;
+    do {
+	umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb);
+
+	prod_low += cy_limb;
+	cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+
+	x = res_ptr[j];
+	prod_low = x - prod_low;
+	cy_limb += prod_low > x?1:0;
+	res_ptr[j] = prod_low;
+    } while( ++j );
+
+    return cy_limb;
+}
+
+
--- a/mpi/generic/mpih-rshift.c
+++ b/mpi/generic/mpih-rshift.c
@ -0,0 +1,68 @@
+/* mpih-rshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+
+mpi_limb_t
+mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+    mpi_limb_t high_limb, low_limb;
+    unsigned sh_1, sh_2;
+    mpi_size_t i;
+    mpi_limb_t retval;
+
+    sh_1 = cnt;
+    wp -= 1;
+    sh_2 = BITS_PER_MPI_LIMB - sh_1;
+    high_limb = up[0];
+    retval = high_limb << sh_2;
+    low_limb = high_limb;
+    for( i=1; i < usize; i++) {
+	high_limb = up[i];
+	wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+	low_limb = high_limb;
+    }
+    wp[i] = low_limb >> sh_1;
+
+    return retval;
+}
+
--- a/mpi/generic/mpih-sub1.c
+++ b/mpi/generic/mpih-sub1.c
@ -0,0 +1,65 @@
+/* mpihelp-add_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+				  mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+    mpi_limb_t x, y, cy;
+    mpi_size_t j;
+
+    /* The loop counter and index J goes from -SIZE to -1.  This way
+       the loop becomes faster.  */
+    j = -size;
+
+    /* Offset the base pointers to compensate for the negative indices.  */
+    s1_ptr -= j;
+    s2_ptr -= j;
+    res_ptr -= j;
+
+    cy = 0;
+    do {
+	y = s2_ptr[j];
+	x = s1_ptr[j];
+	y += cy;		  /* add previous carry to subtrahend */
+	cy = y < cy;		  /* get out carry from that addition */
+	y = x - y;		  /* main subtract */
+	cy += y > x;		  /* get out carry from the subtract, combine */
+	res_ptr[j] = y;
+    } while( ++j );
+
+    return cy;
+}
+
+
--- a/mpi/generic/udiv-w-sdiv.c
+++ b/mpi/generic/udiv-w-sdiv.c
@ -0,0 +1,133 @@
+/* mpihelp_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+ *			  division.
+ * Copyright (C) 1992, 1994, 1996, 1998 Free Software Foundation, Inc.
+ * Contributed by Peter L. Montgomery.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+#if 0  /* not yet ported to MPI */
+
+mpi_limb_t
+mpihelp_udiv_w_sdiv( mpi_limp_t *rp,
+		     mpi_limp_t *a1,
+		     mpi_limp_t *a0,
+		     mpi_limp_t *d   )
+{
+  mp_limb_t q, r;
+  mp_limb_t c0, c1, b1;
+
+  if ((mpi_limb_signed_t) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
+	{
+	  /* dividend, divisor, and quotient are nonnegative */
+	  sdiv_qrnnd (q, r, a1, a0, d);
+	}
+      else
+	{
+	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
+	  /* Divide (c1*2^32 + c0) by d */
+	  sdiv_qrnnd (q, r, c1, c0, d);
+	  /* Add 2^31 to quotient */
+	  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+	}
+    }
+  else
+    {
+      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;			/* A/2 */
+      c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
+
+      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
+	{
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else if (c1 < b1) 		/* So 2^31 <= (A/2)/b1 < 2^32 */
+	{
+	  c1 = (b1 - 1) - c1;
+	  c0 = ~c0;			/* logical NOT */
+
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  q = ~q;			/* (A/2)/b1 */
+	  r = (b1 - 1) - r;
+
+	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
+
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else				/* Implies c1 = b1 */
+	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
+	  if (a0 >= -d)
+	    {
+	      q = -1;
+	      r = a0 + d;
+	    }
+	  else
+	    {
+	      q = -2;
+	      r = a0 + 2*d;
+	    }
+	}
+    }
+
+  *rp = r;
+  return q;
+}
+
+#endif
+
--- a/mpi/hppa/README
+++ b/mpi/hppa/README
@ -0,0 +1,84 @@
+This directory contains mpn functions for various HP PA-RISC chips.  Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+   instructions bwlow (but some sw pipelining is needed to avoid the
+   xmpyu-fstds delay):
+
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib	Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+   (asymptotically) on the PA7100, using the instructions below.  With proper
+   sw pipelining and the unrolling level below, the speed becomes 8
+   cycles/limb.
+
+	fldds	s1_ptr
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	addc
+	addc
+	addc
+	addc
+	addc	%r0,%r0,cy-limb
+
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	add
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib
--- a/mpi/hppa/distfiles
+++ b/mpi/hppa/distfiles
@ -0,0 +1,7 @@
+README
+udiv-qrnnd.S
+mpih-add1.S
+mpih-sub1.S
+mpih-lshift.S
+mpih-rshift.S
+
--- a/mpi/hppa/mpih-add1.S
+++ b/mpi/hppa/mpih-add1.S
@ -0,0 +1,72 @@
+/* hppa add_n -- Add two limb vectors of the same length > 0 and store
+ *		 sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Fee Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	(gr26)
+ *		   mpi_ptr_t s1_ptr,	(gr25)
+ *		   mpi_ptr_t s2_ptr,	(gr24)
+ *		   mpi_size_t size)	(gr23)
+ *
+ * One might want to unroll this as for other processors, but it turns
+ * out that the data cache contention after a store makes such
+ * unrolling useless.  We can't come under 5 cycles/limb anyway.
+ */
+
+	.code
+	.export 	mpihelp_add_n
+	.label		mpihelp_add_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r20
+	ldws,ma 	4(0,%r24),%r19
+
+	addib,= 	-1,%r23,L$end	; check for (SIZE == 1)
+	 add		%r20,%r19,%r28	; add first limbs ignoring cy
+
+	.label L$loop
+	ldws,ma 	4(0,%r25),%r20
+	ldws,ma 	4(0,%r24),%r19
+	stws,ma 	%r28,4(0,%r26)
+	addib,<>	-1,%r23,L$loop
+	 addc		%r20,%r19,%r28
+
+	.label L$end
+	stws		%r28,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r0,%r28
+
+	.exit
+	.procend
--- a/mpi/hppa/mpih-lshift.S
+++ b/mpi/hppa/mpih-lshift.S
@ -0,0 +1,77 @@
+/* hppa   lshift
+ *
+ *      Copyright (C) 1992, 1994, 1998
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(gr26)
+ *		   mpi_ptr_t up,	(gr25)
+ *		   mpi_size_t usize,	(gr24)
+ *		   unsigned cnt)	(gr23)
+ */
+
+	.code
+	.export 	mpihelp_lshift
+	.label		mpihelp_lshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb 	-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,= 	-1,%r24,L$0004
+	vshd		%r0,%r22,%r28		; compute carry out limb
+	ldws,mb 	-4(0,%r25),%r29
+	addib,= 	-1,%r24,L$0002
+	vshd		%r22,%r29,%r20
+
+	.label	L$loop
+	ldws,mb 	-4(0,%r25),%r22
+	stws,mb 	%r20,-4(0,%r26)
+	addib,= 	-1,%r24,L$0003
+	vshd		%r29,%r22,%r20
+	ldws,mb 	-4(0,%r25),%r29
+	stws,mb 	%r20,-4(0,%r26)
+	addib,<>	-1,%r24,L$loop
+	vshd		%r22,%r29,%r20
+
+	.label	L$0002
+	stws,mb 	%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+	.label	L$0003
+	stws,mb 	%r20,-4(0,%r26)
+	.label	L$0004
+	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.exit
+	.procend
+
+
+
--- a/mpi/hppa/mpih-rshift.S
+++ b/mpi/hppa/mpih-rshift.S
@ -0,0 +1,73 @@
+/* hppa   rshift
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
+ *		   mpi_ptr_t up,       (gr25)
+ *		   mpi_size_t usize,   (gr24)
+ *		   unsigned cnt)       (gr23)
+ */
+
+	.code
+	.export 	mpihelp_rshift
+	.label		mpihelp_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r22
+	mtsar		%r23
+	addib,= 	-1,%r24,L$r004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma 	4(0,%r25),%r29
+	addib,= 	-1,%r24,L$r002
+	vshd		%r29,%r22,%r20
+
+	.label	L$roop
+	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	addib,= 	-1,%r24,L$r003
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,<>	-1,%r24,L$roop
+	vshd		%r29,%r22,%r20
+
+	.label	L$r002
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+	.label	L$r003
+	stws,ma 	%r20,4(0,%r26)
+	.label	L$r004
+	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
+
--- a/mpi/hppa/mpih-sub1.S
+++ b/mpi/hppa/mpih-sub1.S
@ -0,0 +1,80 @@
+/* hppa   sub_n -- Sub two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(gr26)
+ *		   mpi_ptr_t s1_ptr,	(gr25)
+ *		   mpi_ptr_t s2_ptr,	(gr24)
+ *		   mpi_size_t size)	(gr23)
+ *
+ * One might want to unroll this as for other processors, but it turns
+ * out that the data cache contention after a store makes such
+ * unrolling useless.  We can't come under 5 cycles/limb anyway.
+ */
+
+
+	.code
+	.export 	mpihelp_sub_n
+	.label		mpihelp_sub_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r20
+	ldws,ma 	4(0,%r24),%r19
+
+	addib,= 	-1,%r23,L$end	; check for (SIZE == 1)
+	 sub		%r20,%r19,%r28	; subtract first limbs ignoring cy
+
+	.label	L$loop
+	ldws,ma 	4(0,%r25),%r20
+	ldws,ma 	4(0,%r24),%r19
+	stws,ma 	%r28,4(0,%r26)
+	addib,<>	-1,%r23,L$loop
+	 subb		%r20,%r19,%r28
+
+	.label	L$end
+	stws		%r28,0(0,%r26)
+	addc		%r0,%r0,%r28
+	bv		0(%r2)
+	 subi		1,%r28,%r28
+
+	.exit
+	.procend
+
+
+
--- a/mpi/hppa/udiv-qrnnd.S
+++ b/mpi/hppa/udiv-qrnnd.S
@ -0,0 +1,298 @@
+/* HP-PA  __udiv_qrnnd division support, used from longlong.h.
+ *	  This version runs fast on pre-PA7000 CPUs.
+ *
+ *      Copyright (C) 1993, 1994, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/* INPUT PARAMETERS
+ *   rem_ptr	   gr26
+ *   n1 	   gr25
+ *   n0 	   gr24
+ *   d		   gr23
+ *
+ *   The code size is a bit excessive.	We could merge the last two ds;addc
+ *   sequences by simply moving the "bb,< Odd" instruction down.  The only
+ *   trouble is the FFFFFFFF code that would need some hacking.
+ */
+
+	.code
+	.export 	__udiv_qrnnd
+	.label		__udiv_qrnnd
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	comb,<		%r23,0,L$largedivisor
+	 sub		%r0,%r23,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r28
+	ds		%r25,%r23,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r23,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r28,%r28,%r28
+
+	.label	L$largedivisor
+	extru		%r24,31,1,%r19		; r19 = n0 & 1
+	bb,<		%r23,31,L$odd
+	 extru		%r23,30,31,%r22 	; r22 = d >> 1
+	shd		%r25,%r24,1,%r24	; r24 = new n0
+	extru		%r25,30,31,%r25 	; r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl 	%r25,%r19,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r24,%r24,%r28
+
+	.label	L$odd
+	addib,sv,n	1,%r22,L$FF..		; r22 = (d / 2 + 1)
+	shd		%r25,%r24,1,%r24	; r24 = new n0
+	extru		%r25,30,31,%r25 	; r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r28
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl 	%r25,%r19,%r25
+; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+	add,nuv 	%r28,%r25,%r25
+	addl		%r25,%r1,%r25
+	addc		%r0,%r28,%r28
+	sub,<<		%r25,%r23,%r0
+	addl		%r25,%r1,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r28,%r28
+
+; This is just a special case of the code above.
+; We come here when d == 0xFFFFFFFF
+	.label	L$FF..
+	add,uv		%r25,%r24,%r24
+	sub,<<		%r24,%r23,%r0
+	ldo		1(%r24),%r24
+	stws		%r24,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r25,%r28
+
+	.exit
+	.procend
--- a/mpi/hppa1.1/distfiles
+++ b/mpi/hppa1.1/distfiles
@ -0,0 +1,5 @@
+udiv-qrnnd.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+
--- a/mpi/hppa1.1/mpih-mul1.S
+++ b/mpi/hppa1.1/mpih-mul1.S
@ -0,0 +1,115 @@
+/* hppa1.1  mul_1 -- Multiply a limb vector with a limb and store
+ *		     the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1993, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(r26)
+ *		  mpi_ptr_t s1_ptr,	(r25)
+ *		  mpi_size_t s1_size,	(r24)
+ *		  mpi_limb_t s2_limb)	(r23)
+ *
+ *
+ *
+ * This runs at 9 cycles/limb on a PA7000.  With the used instructions, it can
+ * not become faster due to data cache contention after a store.  On the
+ * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
+ * only the xmpyu does not need the integer pipeline, so the only dual-issue
+ * we will get are addc+xmpyu.	Unrolling would not help either CPU.
+ *
+ * We could use fldds to read two limbs at a time from the S1 array, and that
+ * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+ * PA7100, respectively.  We don't do that since it does not seem worth the
+ * (alignment) troubles...
+ *
+ * At least the PA7100 is rumored to be able to deal with cache-misses
+ * without stalling instruction issue.	If this is true, and the cache is
+ * actually also lockup-free, we should use a deeper software pipeline, and
+ * load from S1 very early!  (The loads and stores to -12(sp) will surely be
+ * in the cache.)
+ */
+
+	.code
+	.export 	mpihelp_mul_1
+	.label		mpihelp_mul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,= 	-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,= 	-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+	.label	L$loop
+	fldws,ma	4(%r25),%fr5
+	stws,ma 	%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+	.label	L$end
+	stws,ma 	%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	stws,ma 	%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label	L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	fstds		%fr6,-16(%r30)
+	ldw		-16(%r30),%r28
+	ldo		-64(%r30),%r30
+	bv		0(%r2)
+	 fstws		%fr6R,0(%r26)
+
+	.exit
+	.procend
+
+
--- a/mpi/hppa1.1/mpih-mul2.S
+++ b/mpi/hppa1.1/mpih-mul2.S
@ -0,0 +1,117 @@
+/* hppa1.1   addmul_1 -- Multiply a limb vector with a limb and add
+ *			 the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1993, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (r26)
+ *		     mpi_ptr_t s1_ptr,	     (r25)
+ *		     mpi_size_t s1_size,     (r24)
+ *		     mpi_limb_t s2_limb)     (r23)
+ *
+ * This runs at 11 cycles/limb on a PA7000.  With the used instructions, it
+ * can not become faster due to data cache contention after a store.  On the
+ * PA7100 it runs at 10 cycles/limb, and that can not be improved either,
+ * since only the xmpyu does not need the integer pipeline, so the only
+ * dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
+ * on the PA7100.
+ *
+ * There are some ideas described in mul1.S that applies to this code too.
+ */
+
+	.code
+	.export 	mpihelp_addmul_1
+	.label		mpihelp_addmul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,= 	-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,= 	-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+	.label	L$loop
+	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	add		%r29,%r19,%r19
+	stws,ma 	%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+	.label	L$end
+	ldw		0(%r26),%r29
+	add		%r29,%r19,%r19
+	stws,ma 	%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	add		%r29,%r19,%r19
+	stws,ma 	%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	add		%r29,%r1,%r19
+	stw		%r19,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.exit
+	.procend
+
+
--- a/mpi/hppa1.1/mpih-mul3.S
+++ b/mpi/hppa1.1/mpih-mul3.S
@ -0,0 +1,127 @@
+/* hppa1.1   submul_1 -- Multiply a limb vector with a limb and add
+ *			 the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1993, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (r26)
+ *		     mpi_ptr_t s1_ptr,	     (r25)
+ *		     mpi_size_t s1_size,     (r24)
+ *		     mpi_limb_t s2_limb)     (r23)
+ *
+ *
+ * This runs at 12 cycles/limb on a PA7000.  With the used instructions, it
+ * can not become faster due to data cache contention after a store.  On the
+ * PA7100 it runs at 11 cycles/limb, and that can not be improved either,
+ * since only the xmpyu does not need the integer pipeline, so the only
+ * dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
+ * on the PA7100.
+ *
+ * There are some ideas described in mul1.S that applies to this code too.
+ *
+ * It seems possible to make this run as fast as addmul_1, if we use
+ *	 sub,>>= %r29,%r19,%r22
+ *	 addi	 1,%r28,%r28
+ * but that requires reworking the hairy software pipeline...
+ */
+
+
+	.code
+	.export 	mpihelp_submul_1
+	.label		mpihelp_submul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,= 	-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,= 	-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+	.label	L$loop
+	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma 	%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+	.label	L$end
+	ldw		0(%r26),%r29
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma 	%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma 	%r22,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.label	L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	sub		%r29,%r1,%r22
+	add		%r22,%r1,%r0
+	stw		%r22,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.exit
+	.procend
+
--- a/mpi/hppa1.1/udiv-qrnnd.S
+++ b/mpi/hppa1.1/udiv-qrnnd.S
@ -0,0 +1,90 @@
+/* HP-PA  __udiv_qrnnd division support, used from longlong.h.
+ *	  This version runs fast on PA 7000 and later.
+ *
+ *      Copyright (C) 1993, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/* INPUT PARAMETERS
+ *   rem_ptr	   gr26
+ *   n1 	   gr25
+ *   n0 	   gr24
+ *   d		   gr23
+ */
+
+	.code
+	.label L$0000
+	.word		0x43f00000
+	.word		0x0
+	.export 	__udiv_qrnnd
+	.label		__udiv_qrnnd
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+	ldo		64(%r30),%r30
+
+	stws		%r25,-16(0,%r30)	; n_hi
+	stws		%r24,-12(0,%r30)	; n_lo
+	ldil		L'L$0000,%r19           ; '
+	ldo		R'L$0000(%r19),%r19     ; '
+	fldds		-16(0,%r30),%fr5
+	stws		%r23,-12(0,%r30)
+	comib,<=	0,%r25,L$1
+	fcnvxf,dbl,dbl	%fr5,%fr5
+	fldds		0(0,%r19),%fr4
+	fadd,dbl	%fr4,%fr5,%fr5
+	.label	L$1
+	fcpy,sgl	%fr0,%fr6L
+	fldws		-12(0,%r30),%fr6R
+	fcnvxf,dbl,dbl	%fr6,%fr4
+
+	fdiv,dbl	%fr5,%fr4,%fr5
+
+	fcnvfx,dbl,dbl	%fr5,%fr4
+	fstws		%fr4R,-16(%r30)
+	xmpyu		%fr4R,%fr6R,%fr6
+	ldws		-16(%r30),%r28
+	fstds		%fr6,-16(0,%r30)
+	ldws		-12(0,%r30),%r21
+	ldws		-16(0,%r30),%r20
+	sub		%r24,%r21,%r22
+	subb		%r25,%r20,%r19
+	comib,= 	0,%r19,L$2
+	ldo		-64(%r30),%r30
+
+	add		%r22,%r23,%r22
+	ldo		-1(%r28),%r28
+	.label	L$2
+	bv		0(%r2)
+	stws		%r22,0(0,%r26)
+
+	.exit
+	.procend
+
+
--- a/mpi/i386/distfiles
+++ b/mpi/i386/distfiles
@ -0,0 +1,9 @@
+mpih-add1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
+mpih-sub1.S
+syntax.h
+
--- a/mpi/i386/mpih-add1.S
+++ b/mpi/i386/mpih-add1.S
@ -0,0 +1,118 @@
+/* i80386 add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 
+ *                    2001, 2002 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_add_n)
+C_SYMBOL_NAME(mpihelp_add_n:)
+	pushl %edi
+	pushl %esi
+
+	movl 12(%esp),%edi		/* res_ptr */
+	movl 16(%esp),%esi		/* s1_ptr */
+	movl 20(%esp),%edx		/* s2_ptr */
+	movl 24(%esp),%ecx		/* size */
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx 		/* compute count for unrolled loop */
+	negl	%eax
+	andl	$7,%eax 		/* get index where to start loop */
+	jz	Loop			/* necessary special case for 0 */
+	incl	%ecx			/* adjust loop count */
+	shll	$2,%eax 		/* adjustment for pointers... */
+	subl	%eax,%edi		/* ... since they are offset ... */
+	subl	%eax,%esi		/* ... by a constant when we ... */
+	subl	%eax,%edx		/* ... enter the loop */
+	shrl	$2,%eax 		/* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC.  Due to limitations in some
+   assemblers, Loop-L0-3 cannot be put into the leal */
+	call	L0
+L0:	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$(Loop-L0-3),%eax
+	addl	$4,%esp
+#else
+/* Calculate start address in loop for non-PIC.  */
+	leal	Loop-3(%eax,%eax,8),%eax
+#endif
+	jmp	*%eax			/* jump into loop */
+	ALIGN (3)
+Loop:	movl	(%esi),%eax
+	adcl	(%edx),%eax
+	movl	%eax,(%edi)
+	movl	4(%esi),%eax
+	adcl	4(%edx),%eax
+	movl	%eax,4(%edi)
+	movl	8(%esi),%eax
+	adcl	8(%edx),%eax
+	movl	%eax,8(%edi)
+	movl	12(%esi),%eax
+	adcl	12(%edx),%eax
+	movl	%eax,12(%edi)
+	movl	16(%esi),%eax
+	adcl	16(%edx),%eax
+	movl	%eax,16(%edi)
+	movl	20(%esi),%eax
+	adcl	20(%edx),%eax
+	movl	%eax,20(%edi)
+	movl	24(%esi),%eax
+	adcl	24(%edx),%eax
+	movl	%eax,24(%edi)
+	movl	28(%esi),%eax
+	adcl	28(%edx),%eax
+	movl	%eax,28(%edi)
+	leal	32(%edi),%edi
+	leal	32(%esi),%esi
+	leal	32(%edx),%edx
+	decl	%ecx
+	jnz	Loop
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl %esi
+	popl %edi
+	ret
+
--- a/mpi/i386/mpih-lshift.S
+++ b/mpi/i386/mpih-lshift.S
@ -0,0 +1,96 @@
+/* i80386   lshift
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	movl	16(%esp),%edi		/* res_ptr */
+	movl	20(%esp),%esi		/* s_ptr */
+	movl	24(%esp),%edx		/* size */
+	movl	28(%esp),%ecx		/* cnt */
+
+	subl	$4,%esi 		/* adjust s_ptr */
+
+	movl	(%esi,%edx,4),%ebx	/* read most significant limb */
+	xorl	%eax,%eax
+	shldl	%ebx,%eax		/* compute carry limb */
+	decl	%edx
+	jz	Lend
+	pushl	%eax			/* push carry limb onto stack */
+	testb	$1,%dl
+	jnz	L1			/* enter loop in the middle */
+	movl	%ebx,%eax
+
+	ALIGN (3)
+Loop:	movl	(%esi,%edx,4),%ebx	/* load next lower limb */
+	shldl	%ebx,%eax		/* compute result limb */
+	movl	%eax,(%edi,%edx,4)	/* store it */
+	decl	%edx
+L1:	movl	(%esi,%edx,4),%eax
+	shldl	%eax,%ebx
+	movl	%ebx,(%edi,%edx,4)
+	decl	%edx
+	jnz	Loop
+
+	shll	%cl,%eax		/* compute least significant limb */
+	movl	%eax,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+Lend:	shll	%cl,%ebx		/* compute least significant limb */
+	movl	%ebx,(%edi)		/* store it */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
--- a/mpi/i386/mpih-mul1.S
+++ b/mpi/i386/mpih-mul1.S
@ -0,0 +1,86 @@
+/* i80386 mul_1 -- Multiply a limb vector with a limb and store
+ *			 the result in a second limb vector.
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
+ *		  mpi_ptr_t s1_ptr,	(sp + 8)
+ *		  mpi_size_t s1_size,	(sp + 12)
+ *		  mpi_limb_t s2_limb)	(sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_mul_1)
+C_SYMBOL_NAME(mpihelp_mul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i386/mpih-mul2.S
+++ b/mpi/i386/mpih-mul2.S
@ -0,0 +1,88 @@
+/* i80386 addmul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_addmul_1)
+C_SYMBOL_NAME(mpihelp_addmul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(add,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i386/mpih-mul3.S
+++ b/mpi/i386/mpih-mul3.S
@ -0,0 +1,88 @@
+/* i80386 submul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_submul_1)
+C_SYMBOL_NAME(mpihelp_submul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(sub,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i386/mpih-rshift.S
+++ b/mpi/i386/mpih-rshift.S
@ -0,0 +1,99 @@
+/* i80386   rshift
+ *
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001, 2002 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	movl	16(%esp),%edi		/* wp */
+	movl	20(%esp),%esi		/* up */
+	movl	24(%esp),%edx		/* usize */
+	movl	28(%esp),%ecx		/* cnt */
+
+	leal	-4(%edi,%edx,4),%edi
+	leal	(%esi,%edx,4),%esi
+	negl	%edx
+
+	movl	(%esi,%edx,4),%ebx	/* read least significant limb */
+	xorl	%eax,%eax
+	shrdl	%ebx,%eax		/* compute carry limb */
+	incl	%edx
+	jz	Lend2
+	pushl	%eax			/* push carry limb onto stack */
+	testb	$1,%dl
+	jnz	L2			/* enter loop in the middle */
+	movl	%ebx,%eax
+
+	ALIGN (3)
+Loop2:	movl	 (%esi,%edx,4),%ebx	 /* load next higher limb */
+	shrdl	%ebx,%eax		/* compute result limb */
+	movl	%eax,(%edi,%edx,4)	/* store it */
+	incl	%edx
+L2:	movl	(%esi,%edx,4),%eax
+	shrdl	%eax,%ebx
+	movl	%ebx,(%edi,%edx,4)
+	incl	%edx
+	jnz	Loop2
+
+	shrl	%cl,%eax		/* compute most significant limb */
+	movl	%eax,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+Lend2:	shrl	%cl,%ebx		/* compute most significant limb */
+	movl	%ebx,(%edi)		/* store it */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
--- a/mpi/i386/mpih-sub1.S
+++ b/mpi/i386/mpih-sub1.S
@ -0,0 +1,119 @@
+/* i80386 sub_n -- Sub two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 
+ *                    2001, 2002 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_sub_n)
+C_SYMBOL_NAME(mpihelp_sub_n:)
+	pushl %edi
+	pushl %esi
+
+	movl 12(%esp),%edi		/* res_ptr */
+	movl 16(%esp),%esi		/* s1_ptr */
+	movl 20(%esp),%edx		/* s2_ptr */
+	movl 24(%esp),%ecx		/* size */
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx 		/* compute count for unrolled loop */
+	negl	%eax
+	andl	$7,%eax 		/* get index where to start loop */
+	jz	Loop			/* necessary special case for 0 */
+	incl	%ecx			/* adjust loop count */
+	shll	$2,%eax 		/* adjustment for pointers... */
+	subl	%eax,%edi		/* ... since they are offset ... */
+	subl	%eax,%esi		/* ... by a constant when we ... */
+	subl	%eax,%edx		/* ... enter the loop */
+	shrl	$2,%eax 		/* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC.  Due to limitations in some
+   assemblers, Loop-L0-3 cannot be put into the leal */
+	call	L0
+L0:	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$(Loop-L0-3),%eax
+	addl	$4,%esp
+#else
+/* Calculate start address in loop for non-PIC.  */
+	leal	Loop-3(%eax,%eax,8),%eax
+#endif
+	jmp	*%eax			/* jump into loop */
+	ALIGN (3)
+Loop:	movl	(%esi),%eax
+	sbbl	(%edx),%eax
+	movl	%eax,(%edi)
+	movl	4(%esi),%eax
+	sbbl	4(%edx),%eax
+	movl	%eax,4(%edi)
+	movl	8(%esi),%eax
+	sbbl	8(%edx),%eax
+	movl	%eax,8(%edi)
+	movl	12(%esi),%eax
+	sbbl	12(%edx),%eax
+	movl	%eax,12(%edi)
+	movl	16(%esi),%eax
+	sbbl	16(%edx),%eax
+	movl	%eax,16(%edi)
+	movl	20(%esi),%eax
+	sbbl	20(%edx),%eax
+	movl	%eax,20(%edi)
+	movl	24(%esi),%eax
+	sbbl	24(%edx),%eax
+	movl	%eax,24(%edi)
+	movl	28(%esi),%eax
+	sbbl	28(%edx),%eax
+	movl	%eax,28(%edi)
+	leal	32(%edi),%edi
+	leal	32(%esi),%esi
+	leal	32(%edx),%edx
+	decl	%ecx
+	jnz	Loop
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl %esi
+	popl %edi
+	ret
+
--- a/mpi/i386/syntax.h
+++ b/mpi/i386/syntax.h
@ -0,0 +1,68 @@
+/* syntax.h -- Definitions for x86 syntax variations.
+ *
+ *       Copyright (C) 1992, 1994, 1995, 1998, 
+ *                     2001 Free Software Foundation, Inc.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#undef ALIGN
+
+#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX)
+#define R(r) %r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)displacement(R(base))
+#define MEM_INDEX(base,index,size)(R(base),R(index),size)
+#ifdef __STDC__
+#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst
+#else
+#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst
+#endif
+#define TEXT .text
+#if defined (BSD_SYNTAX)
+#define ALIGN(log) .align log
+#endif
+#if defined (ELF_SYNTAX)
+#define ALIGN(log) .align 1<<log
+#endif
+#define GLOBL .globl
+#endif
+
+#ifdef INTEL_SYNTAX
+#define R(r) r
+#define MEM(base)[base]
+#define MEM_DISP(base,displacement)[base+(displacement)]
+#define MEM_INDEX(base,index,size)[base+index*size]
+#define INSN1(mnemonic,size_suffix,dst)mnemonic dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src
+#define TEXT .text
+#define ALIGN(log) .align log
+#define GLOBL .globl
+#endif
+
+#ifdef X86_BROKEN_ALIGN
+#undef ALIGN
+#define ALIGN(log) .align log,0x90
+#endif
--- a/mpi/i586/README
+++ b/mpi/i586/README
@ -0,0 +1,26 @@
+This directory contains mpn functions optimized for Intel Pentium
+processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors.  Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks.  The simplest way to insure this is to read/write
+two words from the same object.  If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+STATUS
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb.  Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should...
--- a/mpi/i586/distfiles
+++ b/mpi/i586/distfiles
@ -0,0 +1,9 @@
+mpih-add1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
+mpih-sub1.S
+README
+
--- a/mpi/i586/mpih-add1.S
+++ b/mpi/i586/mpih-add1.S
@ -0,0 +1,135 @@
+/* i80586 add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1996, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_add_n)
+C_SYMBOL_NAME(mpihelp_add_n:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	adcl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	adcl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
--- a/mpi/i586/mpih-lshift.S
+++ b/mpi/i586/mpih-lshift.S
@ -0,0 +1,231 @@
+/* i80586   lshift
+ *
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Lnormal
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	Lspecial		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebp,4),%eax
+	cmpl	%eax,%edi
+	jnc	Lspecial		/* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Lend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Loop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebx,%edx
+	shldl	%cl,%eax,%ebx
+	movl	%edx,-8(%edi)
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebx,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebp
+	jnz	Loop
+
+Lend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Lend2
+Loop2:	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebp
+	jnz	Loop2
+
+Lend2:	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	addl	%edx,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	LLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+LLoop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	adcl	%ebx,%ebx
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebx,%ebx
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebp
+	jnz	LLoop
+
+LLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	LLend2
+	addl	%eax,%eax		/* restore carry from eax */
+LLoop2: movl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebx,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebp
+	jnz	LLoop2
+
+	jmp	LL1
+LLend2: addl	%eax,%eax		/* restore carry from eax */
+LL1:	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
--- a/mpi/i586/mpih-mul1.S
+++ b/mpi/i586/mpih-mul1.S
@ -0,0 +1,91 @@
+/* i80586 mul_1 -- Multiply a limb vector with a limb and store
+ *			 the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
+ *		  mpi_ptr_t s1_ptr,	(sp + 8)
+ *		  mpi_size_t s1_size,	(sp + 12)
+ *		  mpi_limb_t s2_limb)	(sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_mul_1)
+C_SYMBOL_NAME(mpihelp_mul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i586/mpih-mul2.S
+++ b/mpi/i586/mpih-mul2.S
@ -0,0 +1,95 @@
+/* i80586 addmul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_addmul_1)
+C_SYMBOL_NAME(mpihelp_addmul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i586/mpih-mul3.S
+++ b/mpi/i586/mpih-mul3.S
@ -0,0 +1,95 @@
+/* i80586 submul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_submul_1)
+C_SYMBOL_NAME(mpihelp_submul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(sub,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
--- a/mpi/i586/mpih-rshift.S
+++ b/mpi/i586/mpih-rshift.S
@ -0,0 +1,230 @@
+/* i80586   rshift
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Rnormal
+	leal	4(%edi),%eax
+	cmpl	%esi,%eax
+	jnc	Rspecial		/* jump if res_ptr + 1 >= s_ptr */
+	leal	(%edi,%ebp,4),%eax
+	cmpl	%eax,%esi
+	jnc	Rspecial		/* jump if s_ptr >= res_ptr + size */
+
+Rnormal:
+	movl	(%esi),%edx
+	addl	$4,%esi
+	xorl	%eax,%eax
+	shrdl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Rend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Roop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	shrdl	%cl,%ebx,%edx
+	shrdl	%cl,%eax,%ebx
+	movl	%edx,8(%edi)
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	shrdl	%cl,%edx,%eax
+	shrdl	%cl,%ebx,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,24(%edi)
+	movl	%eax,28(%edi)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	decl	%ebp
+	jnz	Roop
+
+Rend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Rend2
+Roop2:	movl	(%esi),%eax
+	shrdl	%cl,%eax,%edx		/* compute result limb */
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	addl	$4,%esi
+	addl	$4,%edi
+	decl	%ebp
+	jnz	Roop2
+
+Rend2:	shrl	%cl,%edx		/* compute most significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Rspecial:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	shrl	$1,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	RLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+RLoop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	rcrl	$1,%ebx
+	movl	%edx,-8(%edi)
+	rcrl	$1,%eax
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	rcrl	$1,%edx
+	movl	%eax,-16(%edi)
+	rcrl	$1,%ebx
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,-24(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-28(%edi)
+
+	leal	-32(%esi),%esi		/* use leal not to clobber carry */
+	leal	-32(%edi),%edi
+	decl	%ebp
+	jnz	RLoop
+
+RLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	RLend2
+	addl	%eax,%eax		/* restore carry from eax */
+RLoop2: movl	%edx,%ebx
+	movl	(%esi),%edx
+	rcrl	$1,%edx
+	movl	%ebx,(%edi)
+
+	leal	-4(%esi),%esi		/* use leal not to clobber carry */
+	leal	-4(%edi),%edi
+	decl	%ebp
+	jnz	RLoop2
+
+	jmp	RL1
+RLend2: addl	%eax,%eax		/* restore carry from eax */
+RL1:	movl	%edx,(%edi)		/* store last limb */
+
+	movl	$0,%eax
+	rcrl	$1,%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
--- a/mpi/i586/mpih-sub1.S
+++ b/mpi/i586/mpih-sub1.S
@ -0,0 +1,144 @@
+/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_sub_n)
+C_SYMBOL_NAME(mpihelp_sub_n:)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
--- a/mpi/longlong.h
+++ b/mpi/longlong.h
--- a/mpi/m68k/distfiles
+++ b/mpi/m68k/distfiles
@ -0,0 +1,8 @@
+syntax.h
+mpih-lshift.S
+mpih-rshift.S
+mpih-add1.S
+mpih-sub1.S
+
+
+
--- a/mpi/m68k/mc68020/distfiles
+++ b/mpi/m68k/mc68020/distfiles
@ -0,0 +1,3 @@
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
--- a/mpi/m68k/mc68020/mpih-mul1.S
+++ b/mpi/m68k/mc68020/mpih-mul1.S
@ -0,0 +1,106 @@
+/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ *                        the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
+ *		  mpi_ptr_t s1_ptr,	(sp + 8)
+ *		  mpi_size_t s1_size,	(sp + 12)
+ *		  mpi_limb_t s2_limb)	(sp + 16)
+ */
+
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_mul_1)
+
+C_SYMBOL_NAME(mpihelp_mul_1:)
+PROLOG(mpihelp_mul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d4),MEM_PREDEC(sp)
+#if 0
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(d3),MEM_PREDEC(sp)
+	movel	R(d4),MEM_PREDEC(sp)
+#endif
+
+/* Copy the arguments to registers.  Better use movem?	*/
+	movel	MEM_DISP(sp,16),R(res_ptr)
+	movel	MEM_DISP(sp,20),R(s1_ptr)
+	movel	MEM_DISP(sp,24),R(s1_size)
+	movel	MEM_DISP(sp,28),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	movel	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	movel	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	clrl	R(d3)
+	addxl	R(d3),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d4)
+#if 0
+	movel	MEM_POSTINC(sp),R(d4)
+	movel	MEM_POSTINC(sp),R(d3)
+	movel	MEM_POSTINC(sp),R(d2)
+#endif
+	rts
+EPILOG(mpihelp_mul_1)
+
+
--- a/mpi/m68k/mc68020/mpih-mul2.S
+++ b/mpi/m68k/mc68020/mpih-mul2.S
@ -0,0 +1,96 @@
+/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ *                           the result to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_addmul_1)
+
+C_SYMBOL_NAME(mpihelp_addmul_1:)
+PROLOG(mpihelp_addmul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?	*/
+	movel	MEM_DISP(sp,20),R(res_ptr)
+	movel	MEM_DISP(sp,24),R(s1_ptr)
+	movel	MEM_DISP(sp,28),R(s1_size)
+	movel	MEM_DISP(sp,32),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	clrl	R(d5)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)		/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	addxl	R(d5),R(d1)
+	addl	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	addxl	R(d5),R(d0)
+	addl	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	addxl	R(d5),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
+
+	rts
+EPILOG(mpihelp_addmul_1)
+
--- a/mpi/m68k/mc68020/mpih-mul3.S
+++ b/mpi/m68k/mc68020/mpih-mul3.S
@ -0,0 +1,99 @@
+/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ *                           the result from a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_submul_1)
+
+C_SYMBOL_NAME(mpihelp_submul_1:)
+PROLOG(mpihelp_submul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?	*/
+	movel	MEM_DISP(sp,20),R(res_ptr)
+	movel	MEM_DISP(sp,24),R(s1_ptr)
+	movel	MEM_DISP(sp,28),R(s1_size)
+	movel	MEM_DISP(sp,32),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	clrl	R(d5)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	addxl	R(d5),R(d1)
+	subl	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	addxl	R(d5),R(d0)
+	subl	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	addxl	R(d5),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
+
+	rts
+EPILOG(mpihelp_submul_1)
+
+
--- a/mpi/m68k/mpih-add1.S
+++ b/mpi/m68k/mpih-add1.S
@ -0,0 +1,94 @@
+/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+ *                        sum in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994,1996, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 16)
+ *		   mpi_size_t size)	(sp + 12)
+ */
+
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_add_n)
+
+C_SYMBOL_NAME(mpihelp_add_n:)
+PROLOG(mpihelp_add_n)
+	/* Save used registers on the stack.  */
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(a2),MEM_PREDEC(sp)
+
+	/* Copy the arguments to registers.  Better use movem?	*/
+	movel	MEM_DISP(sp,12),R(a2)
+	movel	MEM_DISP(sp,16),R(a0)
+	movel	MEM_DISP(sp,20),R(a1)
+	movel	MEM_DISP(sp,24),R(d2)
+
+	eorw	#1,R(d2)
+	lsrl	#1,R(d2)
+	bcc	L(L1)
+	subql	#1,R(d2)	/* clears cy as side effect */
+
+L(Loop:)
+	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	addxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+L(L1:)	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	addxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+
+	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
+	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
+	subl	#0x10000,R(d2)
+	bcs	L(L2)
+	addl	R(d0),R(d0)	/* restore cy */
+	bra	L(Loop)
+
+L(L2:)
+	negl	R(d0)
+
+	/* Restore used registers from stack frame.  */
+	movel	MEM_POSTINC(sp),R(a2)
+	movel	MEM_POSTINC(sp),R(d2)
+
+	rts
+EPILOG(mpihelp_add_n)
+
+
--- a/mpi/m68k/mpih-lshift.S
+++ b/mpi/m68k/mpih-lshift.S
@ -0,0 +1,166 @@
+/* mc68020 lshift -- Shift left a low-level natural-number integer.
+ *
+ *      Copyright (C) 1996, 1998, 2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_lshift)
+
+C_SYMBOL_NAME(mpihelp_lshift:)
+PROLOG(mpihelp_lshift)
+
+	/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+	/* Copy the arguments to registers.  */
+	movel	MEM_DISP(sp,28),R(res_ptr)
+	movel	MEM_DISP(sp,32),R(s_ptr)
+	movel	MEM_DISP(sp,36),R(s_size)
+	movel	MEM_DISP(sp,40),R(cnt)
+
+	moveql	#1,R(d5)
+	cmpl	R(d5),R(cnt)
+	bne	L(Lnormal)
+	cmpl	R(s_ptr),R(res_ptr)
+	bls	L(Lspecial)		/* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	lea	MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+	cmpl	R(res_ptr),R(a2)
+	bls	L(Lspecial)		/* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+	moveql	#32,R(d5)
+	subl	R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	addl	R(s_size),R(s_ptr)
+	addl	R(s_size),R(res_ptr)
+#endif
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	movel	R(d2),R(d0)
+	lsrl	R(d5),R(d0)		/* compute carry limb */
+
+	lsll	R(cnt),R(d2)
+	movel	R(d2),R(d1)
+	subql	#1,R(s_size)
+	beq	L(Lend)
+	lsrl	#1,R(s_size)
+	bcs	L(L1)
+	subql	#1,R(s_size)
+
+L(Loop:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	movel	R(d2),R(d3)
+	lsrl	R(d5),R(d3)
+	orl	R(d3),R(d1)
+	movel	R(d1),MEM_PREDEC(res_ptr)
+	lsll	R(cnt),R(d2)
+L(L1:)
+	movel	MEM_PREDEC(s_ptr),R(d1)
+	movel	R(d1),R(d3)
+	lsrl	R(d5),R(d3)
+	orl	R(d3),R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+	lsll	R(cnt),R(d1)
+
+	dbf	R(s_size),L(Loop)
+	subl	#0x10000,R(s_size)
+	bcc	L(Loop)
+
+L(Lend:)
+	movel	R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Lspecial:)
+	clrl	R(d0)			/* initialize carry */
+	eorw	#1,R(s_size)
+	lsrl	#1,R(s_size)
+	bcc	L(LL1)
+	subql	#1,R(s_size)
+
+L(LLoop:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	addxl	R(d2),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	addxl	R(d2),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+
+	dbf	R(s_size),L(LLoop)
+	addxl	R(d0),R(d0)		/* save cy in lsb */
+	subl	#0x10000,R(s_size)
+	bcs	L(LLend)
+	lsrl	#1,R(d0)		/* restore cy */
+	bra	L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+EPILOG(mpihelp_lshift)
+
+
+
+
+
--- a/mpi/m68k/mpih-rshift.S
+++ b/mpi/m68k/mpih-rshift.S
@ -0,0 +1,164 @@
+/* mc68020 rshift -- Shift right a low-level natural-number integer.
+ *
+ *      Copyright (C) 1996, 1998, 2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_rshift)
+
+C_SYMBOL_NAME(mpihelp_rshift:)
+PROLOG(mpihelp_rshift)
+	/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+	/* Copy the arguments to registers.  */
+	movel	MEM_DISP(sp,28),R(res_ptr)
+	movel	MEM_DISP(sp,32),R(s_ptr)
+	movel	MEM_DISP(sp,36),R(s_size)
+	movel	MEM_DISP(sp,40),R(cnt)
+
+	moveql	#1,R(d5)
+	cmpl	R(d5),R(cnt)
+	bne	L(Rnormal)
+	cmpl	R(res_ptr),R(s_ptr)
+	bls	L(Rspecial)		/* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	lea	MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+	cmpl	R(s_ptr),R(a2)
+	bls	L(Rspecial)		/* jump if s_ptr >= res_ptr + s_size */
+
+L(Rnormal:)
+	moveql	#32,R(d5)
+	subl	R(cnt),R(d5)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d0)
+	lsll	R(d5),R(d0)		/* compute carry limb */
+
+	lsrl	R(cnt),R(d2)
+	movel	R(d2),R(d1)
+	subql	#1,R(s_size)
+	beq	L(Rend)
+	lsrl	#1,R(s_size)
+	bcs	L(R1)
+	subql	#1,R(s_size)
+
+L(Roop:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d1)
+	movel	R(d1),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d2)
+L(R1:)
+	movel	MEM_POSTINC(s_ptr),R(d1)
+	movel	R(d1),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d1)
+
+	dbf	R(s_size),L(Roop)
+	subl	#0x10000,R(s_size)
+	bcc	L(Roop)
+
+L(Rend:)
+	movel	R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+
+/* We loop from most significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Rspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	addl	R(s_size),R(s_ptr)
+	addl	R(s_size),R(res_ptr)
+#endif
+
+	clrl	R(d0)			/* initialize carry */
+	eorw	#1,R(s_size)
+	lsrl	#1,R(s_size)
+	bcc	L(LR1)
+	subql	#1,R(s_size)
+
+L(LRoop:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+L(LR1:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+
+	dbf	R(s_size),L(LRoop)
+	roxrl	#1,R(d0)		/* save cy in msb */
+	subl	#0x10000,R(s_size)
+	bcs	L(LRend)
+	addl	R(d0),R(d0)		/* restore cy */
+	bra	L(LRoop)
+
+L(LRend:)
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+EPILOG(mpihelp_rshift)
+
+
+
+
--- a/mpi/m68k/mpih-sub1.S
+++ b/mpi/m68k/mpih-sub1.S
@ -0,0 +1,93 @@
+/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ *                        store difference in a third limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 16)
+ *		   mpi_size_t size)	(sp + 12)
+ */
+
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_sub_n)
+
+C_SYMBOL_NAME(mpihelp_sub_n:)
+PROLOG(mpihelp_sub_n)
+/* Save used registers on the stack.  */
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?	*/
+	movel	MEM_DISP(sp,12),R(a2)
+	movel	MEM_DISP(sp,16),R(a0)
+	movel	MEM_DISP(sp,20),R(a1)
+	movel	MEM_DISP(sp,24),R(d2)
+
+	eorw	#1,R(d2)
+	lsrl	#1,R(d2)
+	bcc	L(L1)
+	subql	#1,R(d2)	/* clears cy as side effect */
+
+L(Loop:)
+	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	subxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+L(L1:)	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	subxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+
+	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
+	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
+	subl	#0x10000,R(d2)
+	bcs	L(L2)
+	addl	R(d0),R(d0)	/* restore cy */
+	bra	L(Loop)
+
+L(L2:)
+	negl	R(d0)
+
+/* Restore used registers from stack frame.  */
+	movel	MEM_POSTINC(sp),R(a2)
+	movel	MEM_POSTINC(sp),R(d2)
+
+	rts
+EPILOG(mpihelp_sub_n)
+
+
--- a/mpi/m68k/syntax.h
+++ b/mpi/m68k/syntax.h
@ -0,0 +1,187 @@
+/* asm.h -- Definitions for 68k syntax variations.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *       
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#undef ALIGN
+
+#ifdef MIT_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)base@
+#define MEM_DISP(base,displacement)base@(displacement)
+#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale)
+#define MEM_PREDEC(memory_base)memory_base@-
+#define MEM_POSTINC(memory_base)memory_base@+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#define moveql moveq
+/* Use variable sized opcodes.  */
+#define bcc jcc
+#define bcs jcs
+#define bls jls
+#define beq jeq
+#define bne jne
+#define bra jra
+#endif
+
+#ifdef SONY_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#endif
+
+#ifdef MOTOROLA_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT
+#define ALIGN
+#define GLOBL XDEF
+#define lea LEA
+#define movel MOVE.L
+#define moveml MOVEM.L
+#define moveql MOVEQ.L
+#define cmpl CMP.L
+#define orl OR.L
+#define clrl CLR.L
+#define eorw EOR.W
+#define lsrl LSR.L
+#define lsll LSL.L
+#define roxrl ROXR.L
+#define roxll ROXL.L
+#define addl ADD.L
+#define addxl ADDX.L
+#define addql ADDQ.L
+#define subl SUB.L
+#define subxl SUBX.L
+#define subql SUBQ.L
+#define negl NEG.L
+#define mulul MULU.L
+#define bcc BCC
+#define bcs BCS
+#define bls BLS
+#define beq BEQ
+#define bne BNE
+#define bra BRA
+#define dbf DBF
+#define rts RTS
+#define d0 D0
+#define d1 D1
+#define d2 D2
+#define d3 D3
+#define d4 D4
+#define d5 D5
+#define d6 D6
+#define d7 D7
+#define a0 A0
+#define a1 A1
+#define a2 A2
+#define a3 A3
+#define a4 A4
+#define a5 A5
+#define a6 A6
+#define a7 A7
+#define sp SP
+#endif
+
+#ifdef ELF_SYNTAX
+#define PROLOG(name) .type name,@function
+#define EPILOG(name) .size name,.-name
+#define MEM(base)(R(base))
+#define MEM_DISP(base,displacement)(displacement,R(base))
+#define MEM_PREDEC(memory_base)-(R(memory_base))
+#define MEM_POSTINC(memory_base)(R(memory_base))+
+#ifdef __STDC__
+#define R_(r)%##r
+#define R(r)R_(r)
+#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix))
+#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix)
+#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale))
+#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale)
+#define L(label) .##label
+#else
+#define R(r)%/**/r
+#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale)
+#define L(label) ./**/label
+#endif
+#define TEXT .text
+#define ALIGN .align 2
+#define GLOBL .globl
+#define bcc jbcc
+#define bcs jbcs
+#define bls jbls
+#define beq jbeq
+#define bne jbne
+#define bra jbra
+#endif
+
+#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX)
+#define movel move.l
+#define moveml movem.l
+#define moveql moveq.l
+#define cmpl cmp.l
+#define orl or.l
+#define clrl clr.l
+#define eorw eor.w
+#define lsrl lsr.l
+#define lsll lsl.l
+#define roxrl roxr.l
+#define roxll roxl.l
+#define addl add.l
+#define addxl addx.l
+#define addql addq.l
+#define subl sub.l
+#define subxl subx.l
+#define subql subq.l
+#define negl neg.l
+#define mulul mulu.l
+#endif
--- a/mpi/mips3/README
+++ b/mpi/mips3/README
@ -0,0 +1,23 @@
+This directory contains mpn functions optimized for MIPS3.  Example of
+processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. On the R4000 and R4400, branches, both the plain and the "likely" ones,
+   take 3 cycles to execute.  (The fastest possible loop will take 4 cycles,
+   because of the delay insn.)
+
+   On the R4600, branches takes a single cycle
+
+   On the R8000, branches often take no noticable cycles, as they are
+   executed in a separate function unit..
+
+2. The R4000 and R4400 have a load latency of 4 cycles.
+
+3. On the R4000 and R4400, multiplies take a data-dependent number of
+   cycles, contrary to the SGI documentation.  There seem to be 3 or 4
+   possible latencies.
+
+STATUS
+
+Good...
--- a/mpi/mips3/distfiles
+++ b/mpi/mips3/distfiles
@ -0,0 +1,10 @@
+README
+mpih-add1.S
+mpih-sub1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
+mpi-asm-defs.h
+
--- a/mpi/mips3/mpi-asm-defs.h
+++ b/mpi/mips3/mpi-asm-defs.h
@ -0,0 +1,10 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  8
+
+
+
+
+
+
--- a/mpi/mips3/mpih-add1.S
+++ b/mpi/mips3/mpih-add1.S
@ -0,0 +1,124 @@
+/* mips3  add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *      Copyright (C) 1995, 1998, 2000
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	($4)
+ *		   mpi_ptr_t s1_ptr,	($5)
+ *		   mpi_ptr_t s2_ptr,	($6)
+ *		   mpi_size_t size)	($7)
+ */
+
+	.text
+	.align	2
+	.globl	mpihelp_add_n
+	.ent	mpihelp_add_n
+mpihelp_add_n:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)
+	ld	$11,0($6)
+
+	daddiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 move	$2,$0
+
+	dsubu	$7,$7,$9
+
+.Loop0: daddiu	$9,$9,-1
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,8
+	daddiu	$6,$6,8
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 daddiu $4,$4,8
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	daddiu	$7,$7,-4
+
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	ld	$10,16($5)
+	daddu	$13,$13,$2
+	ld	$11,16($6)
+	sltu	$8,$13,$2
+	daddu	$13,$12,$13
+	sltu	$2,$13,$12
+	sd	$13,8($4)
+	or	$2,$2,$8
+
+	ld	$12,24($5)
+	daddu	$11,$11,$2
+	ld	$13,24($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,16($4)
+	or	$2,$2,$8
+
+	ld	$10,32($5)
+	daddu	$13,$13,$2
+	ld	$11,32($6)
+	sltu	$8,$13,$2
+	daddu	$13,$12,$13
+	sltu	$2,$13,$12
+	sd	$13,24($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,32
+	daddiu	$6,$6,32
+
+	bne	$7,$0,.Loop
+	 daddiu $4,$4,32
+
+.Lend:	daddu	$11,$11,$2
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	mpihelp_add_n
+
--- a/mpi/mips3/mpih-lshift.S
+++ b/mpi/mips3/mpih-lshift.S
@ -0,0 +1,97 @@
+/* mips3    lshift
+ *
+ *      Copyright (C) 1995, 1998, 2000,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	($4)
+ *		   mpi_ptr_t up,	($5)
+ *		   mpi_size_t usize,	($6)
+ *		   unsigned cnt)	($7)
+ */
+
+	.text
+	.align	2
+	.globl	mpihelp_lshift
+	.ent	mpihelp_lshift
+mpihelp_lshift:
+	.set	noreorder
+	.set	nomacro
+
+	dsll	$2,$6,3
+	daddu	$5,$5,$2	# make r5 point at end of src
+	ld	$10,-8($5)	# load first limb
+	dsubu	$13,$0,$7
+	daddu	$4,$4,$2	# make r4 point at end of res
+	daddiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 dsrl	$2,$10,$13	# compute function result
+
+	dsubu	$6,$6,$9
+
+.Loop0: ld	$3,-16($5)
+	daddiu	$4,$4,-8
+	daddiu	$5,$5,-8
+	daddiu	$9,$9,-1
+	dsll	$11,$10,$7
+	dsrl	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sd	$8,0($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	ld	$3,-16($5)
+	daddiu	$4,$4,-32
+	daddiu	$6,$6,-4
+	dsll	$11,$10,$7
+	dsrl	$12,$3,$13
+
+	ld	$10,-24($5)
+	dsll	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,24($4)
+	dsrl	$9,$10,$13
+
+	ld	$3,-32($5)
+	dsll	$11,$10,$7
+	or	$8,$14,$9
+	sd	$8,16($4)
+	dsrl	$12,$3,$13
+
+	ld	$10,-40($5)
+	dsll	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,8($4)
+	dsrl	$9,$10,$13
+
+	daddiu	$5,$5,-32
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sd	$8,0($4)
+
+.Lend:	dsll	$8,$10,$7
+	j	$31
+	sd	$8,-8($4)
+	.end	mpihelp_lshift
--- a/mpi/mips3/mpih-mul1.S
+++ b/mpi/mips3/mpih-mul1.S
@ -0,0 +1,89 @@
+/* mips3 mpih-mul1.S -- Multiply a limb vector with a limb and store
+ *			the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 2000
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(r4)
+ *		  mpi_ptr_t s1_ptr,	(r5)
+ *		  mpi_size_t s1_size,	(r6)
+ *		  mpi_limb_t s2_limb)	(r7)
+ */
+
+	.text
+	.align	4
+	.globl	mpihelp_mul_1
+	.ent	mpihelp_mul_1
+mpihelp_mul_1:
+	.set	noreorder
+	.set	nomacro
+
+/* # warm up phase 0 */
+	ld	$8,0($5)
+
+/* # warm up phase 1 */
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	mflo	$10
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$10,$10,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$10,$2	# carry from previous addition -> $2
+	sd	$10,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 1 */
+$LC1:	mflo	$10
+	mfhi	$9
+	daddu	$10,$10,$2
+	sltu	$2,$10,$2
+	dmultu	$8,$7
+	sd	$10,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 0 */
+$LC0:	mflo	$10
+	mfhi	$9
+	daddu	$10,$10,$2
+	sltu	$2,$10,$2
+	sd	$10,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	mpihelp_mul_1
+
--- a/mpi/mips3/mpih-mul2.S
+++ b/mpi/mips3/mpih-mul2.S
@ -0,0 +1,101 @@
+/* MIPS3   addmul_1 -- Multiply a limb vector with a single limb and
+ *		       add the product to a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 2000
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (r4)
+ *		     mpi_ptr_t s1_ptr,	     (r5)
+ *		     mpi_size_t s1_size,     (r6)
+ *		     mpi_limb_t s2_limb)     (r7)
+ */
+
+	.text
+	.align	4
+	.globl	mpihelp_addmul_1
+	.ent	mpihelp_addmul_1
+mpihelp_addmul_1:
+	.set	noreorder
+	.set	nomacro
+
+/* # warm up phase 0 */
+	ld	$8,0($5)
+
+/* # warm up phase 1 */
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$3,$3,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 1 */
+$LC1:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dmultu	$8,$7
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 0 */
+$LC0:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	mpihelp_addmul_1
+
--- a/mpi/mips3/mpih-mul3.S
+++ b/mpi/mips3/mpih-mul3.S
@ -0,0 +1,101 @@
+/* MIPS3 submul_1 -- Multiply a limb vector with a single limb and
+ *		     subtract the product from a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1995, 1998, 2000
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (r4)
+ *		     mpi_ptr_t s1_ptr,	     (r5)
+ *		     mpi_size_t s1_size,     (r6)
+ *		     mpi_limb_t s2_limb)     (r7)
+ */
+
+	.text
+	.align	4
+	.globl	mpihelp_submul_1
+	.ent	mpihelp_submul_1
+mpihelp_submul_1:
+	.set	noreorder
+	.set	nomacro
+
+/* # warm up phase 0 */
+	ld	$8,0($5)
+
+/* # warm up phase 1 */
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$3,$3,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 1 */
+$LC1:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dmultu	$8,$7
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+/* # cool down phase 0 */
+$LC0:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	mpihelp_submul_1
+
--- a/mpi/mips3/mpih-rshift.S
+++ b/mpi/mips3/mpih-rshift.S
@ -0,0 +1,95 @@
+/* mips3    rshift
+ *
+ *      Copyright (C) 1995, 1998, 2000
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	($4)
+ *		   mpi_ptr_t up,	($5)
+ *		   mpi_size_t usize,	($6)
+ *		   unsigned cnt)	($7)
+ */
+
+	.text
+	.align	2
+	.globl	mpihelp_rshift
+	.ent	mpihelp_rshift
+mpihelp_rshift:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)	# load first limb
+	dsubu	$13,$0,$7
+	daddiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 dsll	$2,$10,$13	# compute function result
+
+	dsubu	$6,$6,$9
+
+.Loop0: ld	$3,8($5)
+	daddiu	$4,$4,8
+	daddiu	$5,$5,8
+	daddiu	$9,$9,-1
+	dsrl	$11,$10,$7
+	dsll	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sd	$8,-8($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	ld	$3,8($5)
+	daddiu	$4,$4,32
+	daddiu	$6,$6,-4
+	dsrl	$11,$10,$7
+	dsll	$12,$3,$13
+
+	ld	$10,16($5)
+	dsrl	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,-32($4)
+	dsll	$9,$10,$13
+
+	ld	$3,24($5)
+	dsrl	$11,$10,$7
+	or	$8,$14,$9
+	sd	$8,-24($4)
+	dsll	$12,$3,$13
+
+	ld	$10,32($5)
+	dsrl	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,-16($4)
+	dsll	$9,$10,$13
+
+	daddiu	$5,$5,32
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sd	$8,-8($4)
+
+.Lend:	dsrl	$8,$10,$7
+	j	$31
+	sd	$8,0($4)
+	.end	mpihelp_rshift
+
--- a/mpi/mips3/mpih-sub1.S
+++ b/mpi/mips3/mpih-sub1.S
@ -0,0 +1,125 @@
+/* mips3  sub_n -- Subtract two limb vectors of the same length > 0 and
+ *		  store difference in a third limb vector.
+ * 
+ *      Copyright (C) 1995, 1998, 1999, 2000,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(r4)
+ *		   mpi_ptr_t s1_ptr,	(r5)
+ *		   mpi_ptr_t s2_ptr,	(r6)
+ *		   mpi_size_t size)	(r7)
+ */
+
+
+	.text
+	.align	2
+	.globl	mpihelp_sub_n
+	.ent	mpihelp_sub_n
+mpihelp_sub_n:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)
+	ld	$11,0($6)
+
+	daddiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	move   $2,$0
+
+	dsubu	$7,$7,$9
+
+.Loop0: daddiu	$9,$9,-1
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,8
+	daddiu	$6,$6,8
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 daddiu $4,$4,8
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	daddiu	$7,$7,-4
+
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	ld	$10,16($5)
+	daddu	$13,$13,$2
+	ld	$11,16($6)
+	sltu	$8,$13,$2
+	dsubu	$13,$12,$13
+	sltu	$2,$12,$13
+	sd	$13,8($4)
+	or	$2,$2,$8
+
+	ld	$12,24($5)
+	daddu	$11,$11,$2
+	ld	$13,24($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,16($4)
+	or	$2,$2,$8
+
+	ld	$10,32($5)
+	daddu	$13,$13,$2
+	ld	$11,32($6)
+	sltu	$8,$13,$2
+	dsubu	$13,$12,$13
+	sltu	$2,$12,$13
+	sd	$13,24($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,32
+	daddiu	$6,$6,32
+
+	bne	$7,$0,.Loop
+	 daddiu $4,$4,32
+
+.Lend:	daddu	$11,$11,$2
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	mpihelp_sub_n
+
--- a/mpi/mpi-add.c
+++ b/mpi/mpi-add.c
@ -0,0 +1,247 @@
+/* mpi-add.c  -  MPI functions
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+void
+mpi_add_ui(MPI w, MPI u, unsigned long v )
+{
+    mpi_ptr_t wp, up;
+    mpi_size_t usize, wsize;
+    int usign, wsign;
+
+    usize = u->nlimbs;
+    usign = u->sign;
+    wsign = 0;
+
+    /* If not space for W (and possible carry), increase space.  */
+    wsize = usize + 1;
+    if( w->alloced < wsize )
+	mpi_resize(w, wsize);
+
+    /* These must be after realloc (U may be the same as W).  */
+    up = u->d;
+    wp = w->d;
+
+    if( !usize ) {  /* simple */
+	wp[0] = v;
+	wsize = v? 1:0;
+    }
+    else if( !usign ) {  /* mpi is not negative */
+	mpi_limb_t cy;
+	cy = mpihelp_add_1(wp, up, usize, v);
+	wp[usize] = cy;
+	wsize = usize + cy;
+    }
+    else {  /* The signs are different.  Need exact comparison to determine
+	     * which operand to subtract from which.  */
+	if( usize == 1 && up[0] < v ) {
+	    wp[0] = v - up[0];
+	    wsize = 1;
+	}
+	else {
+	    mpihelp_sub_1(wp, up, usize, v);
+	    /* Size can decrease with at most one limb. */
+	    wsize = usize - (wp[usize-1]==0);
+	    wsign = 1;
+	}
+    }
+
+    w->nlimbs = wsize;
+    w->sign   = wsign;
+}
+
+
+void
+mpi_add(MPI w, MPI u, MPI v)
+{
+    mpi_ptr_t wp, up, vp;
+    mpi_size_t usize, vsize, wsize;
+    int usign, vsign, wsign;
+
+    if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */
+	usize = v->nlimbs;
+	usign = v->sign;
+	vsize = u->nlimbs;
+	vsign = u->sign;
+	wsize = usize + 1;
+	RESIZE_IF_NEEDED(w, wsize);
+	/* These must be after realloc (u or v may be the same as w).  */
+	up    = v->d;
+	vp    = u->d;
+    }
+    else {
+	usize = u->nlimbs;
+	usign = u->sign;
+	vsize = v->nlimbs;
+	vsign = v->sign;
+	wsize = usize + 1;
+	RESIZE_IF_NEEDED(w, wsize);
+	/* These must be after realloc (u or v may be the same as w).  */
+	up    = u->d;
+	vp    = v->d;
+    }
+    wp = w->d;
+    wsign = 0;
+
+    if( !vsize ) {  /* simple */
+	MPN_COPY(wp, up, usize );
+	wsize = usize;
+	wsign = usign;
+    }
+    else if( usign != vsign ) { /* different sign */
+	/* This test is right since USIZE >= VSIZE */
+	if( usize != vsize ) {
+	    mpihelp_sub(wp, up, usize, vp, vsize);
+	    wsize = usize;
+	    MPN_NORMALIZE(wp, wsize);
+	    wsign = usign;
+	}
+	else if( mpihelp_cmp(up, vp, usize) < 0 ) {
+	    mpihelp_sub_n(wp, vp, up, usize);
+	    wsize = usize;
+	    MPN_NORMALIZE(wp, wsize);
+	    if( !usign )
+		wsign = 1;
+	}
+	else {
+	    mpihelp_sub_n(wp, up, vp, usize);
+	    wsize = usize;
+	    MPN_NORMALIZE(wp, wsize);
+	    if( usign )
+		wsign = 1;
+	}
+    }
+    else { /* U and V have same sign. Add them. */
+	mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+	wp[usize] = cy;
+	wsize = usize + cy;
+	if( usign )
+	    wsign = 1;
+    }
+
+    w->nlimbs = wsize;
+    w->sign = wsign;
+}
+
+
+/****************
+ * Subtract the unsigned integer V from the mpi-integer U and store the
+ * result in W.
+ */
+void
+mpi_sub_ui(MPI w, MPI u, unsigned long v )
+{
+    mpi_ptr_t wp, up;
+    mpi_size_t usize, wsize;
+    int usign, wsign;
+
+    usize = u->nlimbs;
+    usign = u->sign;
+    wsign = 0;
+
+    /* If not space for W (and possible carry), increase space.  */
+    wsize = usize + 1;
+    if( w->alloced < wsize )
+	mpi_resize(w, wsize);
+
+    /* These must be after realloc (U may be the same as W).  */
+    up = u->d;
+    wp = w->d;
+
+    if( !usize ) {  /* simple */
+	wp[0] = v;
+	wsize = v? 1:0;
+	wsign = 1;
+    }
+    else if( usign ) {	/* mpi and v are negative */
+	mpi_limb_t cy;
+	cy = mpihelp_add_1(wp, up, usize, v);
+	wp[usize] = cy;
+	wsize = usize + cy;
+    }
+    else {  /* The signs are different.  Need exact comparison to determine
+	     * which operand to subtract from which.  */
+	if( usize == 1 && up[0] < v ) {
+	    wp[0] = v - up[0];
+	    wsize = 1;
+	    wsign = 1;
+	}
+	else {
+	    mpihelp_sub_1(wp, up, usize, v);
+	    /* Size can decrease with at most one limb. */
+	    wsize = usize - (wp[usize-1]==0);
+	}
+    }
+
+    w->nlimbs = wsize;
+    w->sign   = wsign;
+}
+
+void
+mpi_sub(MPI w, MPI u, MPI v)
+{
+    if( w == v ) {
+	MPI vv = mpi_copy(v);
+	vv->sign = !vv->sign;
+	mpi_add( w, u, vv );
+	mpi_free(vv);
+    }
+    else {
+	/* fixme: this is not thread-save (we temp. modify v) */
+	v->sign = !v->sign;
+	mpi_add( w, u, v );
+	v->sign = !v->sign;
+    }
+}
+
+
+void
+mpi_addm( MPI w, MPI u, MPI v, MPI m)
+{
+    mpi_add(w, u, v);
+    mpi_fdiv_r( w, w, m );
+}
+
+void
+mpi_subm( MPI w, MPI u, MPI v, MPI m)
+{
+    mpi_sub(w, u, v);
+    mpi_fdiv_r( w, w, m );
+}
+
--- a/mpi/mpi-bit.c
+++ b/mpi/mpi-bit.c
@ -0,0 +1,262 @@
+/* mpi-bit.c  -  MPI bit level fucntions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+#ifdef MPI_INTERNAL_NEED_CLZ_TAB
+#ifdef __STDC__
+const
+#endif
+unsigned char
+__clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+#endif
+
+
+#define A_LIMB_1 ((mpi_limb_t)1)
+
+
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+void
+mpi_normalize( MPI a )
+{
+    if( mpi_is_protected(a) )
+	return;
+
+    for( ; a->nlimbs && !a->d[a->nlimbs-1]; a->nlimbs-- )
+	;
+}
+
+
+
+/****************
+ * Return the number of bits in A.
+ */
+unsigned
+mpi_get_nbits( MPI a )
+{
+    unsigned n;
+
+    if( mpi_is_protected(a) ) {
+	n = mpi_get_nbit_info(a);
+	if( !n )
+	    n = a->nlimbs * BITS_PER_MPI_LIMB;
+	return n;
+    }
+
+    mpi_normalize( a );
+    if( a->nlimbs ) {
+	mpi_limb_t alimb = a->d[a->nlimbs-1];
+	if( alimb )
+	    count_leading_zeros( n, alimb );
+	else
+	    n = BITS_PER_MPI_LIMB;
+	n = BITS_PER_MPI_LIMB - n + (a->nlimbs-1) * BITS_PER_MPI_LIMB;
+    }
+    else
+	n = 0;
+    return n;
+}
+
+
+/****************
+ * Test whether bit N is set.
+ */
+int
+mpi_test_bit( MPI a, unsigned n )
+{
+    unsigned limbno, bitno;
+    mpi_limb_t limb;
+
+    limbno = n / BITS_PER_MPI_LIMB;
+    bitno  = n % BITS_PER_MPI_LIMB;
+
+    if( limbno >= a->nlimbs )
+	return 0; /* too far left: this is a 0 */
+    limb = a->d[limbno];
+    return (limb & (A_LIMB_1 << bitno))? 1: 0;
+}
+
+
+/****************
+ * Set bit N of A.
+ */
+void
+mpi_set_bit( MPI a, unsigned n )
+{
+    unsigned limbno, bitno;
+
+    limbno = n / BITS_PER_MPI_LIMB;
+    bitno  = n % BITS_PER_MPI_LIMB;
+
+    if( limbno >= a->nlimbs ) { /* resize */
+	if( a->alloced >= limbno )
+	    mpi_resize(a, limbno+1 );
+	a->nlimbs = limbno+1;
+    }
+    a->d[limbno] |= (A_LIMB_1<<bitno);
+}
+
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+void
+mpi_set_highbit( MPI a, unsigned n )
+{
+    unsigned limbno, bitno;
+
+    limbno = n / BITS_PER_MPI_LIMB;
+    bitno  = n % BITS_PER_MPI_LIMB;
+
+    if( limbno >= a->nlimbs ) { /* resize */
+	if( a->alloced >= limbno )
+	    mpi_resize(a, limbno+1 );
+	a->nlimbs = limbno+1;
+    }
+    a->d[limbno] |= (A_LIMB_1<<bitno);
+    for( bitno++; bitno < BITS_PER_MPI_LIMB; bitno++ )
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+    a->nlimbs = limbno+1;
+}
+
+/****************
+ * clear bit N of A and all bits above
+ */
+void
+mpi_clear_highbit( MPI a, unsigned n )
+{
+    unsigned limbno, bitno;
+
+    limbno = n / BITS_PER_MPI_LIMB;
+    bitno  = n % BITS_PER_MPI_LIMB;
+
+    if( limbno >= a->nlimbs )
+	return; /* not allocated, so need to clear bits :-) */
+
+    for( ; bitno < BITS_PER_MPI_LIMB; bitno++ )
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+    a->nlimbs = limbno+1;
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void
+mpi_clear_bit( MPI a, unsigned n )
+{
+    unsigned limbno, bitno;
+
+    limbno = n / BITS_PER_MPI_LIMB;
+    bitno  = n % BITS_PER_MPI_LIMB;
+
+    if( limbno >= a->nlimbs )
+	return; /* don't need to clear this bit, it's to far to left */
+    a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+
+
+/****************
+ * Shift A by N bits to the right
+ * FIXME: should use alloc_limb if X and A are same.
+ */
+void
+mpi_rshift( MPI x, MPI a, unsigned n )
+{
+    mpi_ptr_t xp;
+    mpi_size_t xsize;
+
+    xsize = a->nlimbs;
+    x->sign = a->sign;
+    RESIZE_IF_NEEDED(x, xsize);
+    xp = x->d;
+
+    if( xsize ) {
+	mpihelp_rshift( xp, a->d, xsize, n);
+	MPN_NORMALIZE( xp, xsize);
+    }
+    x->nlimbs = xsize;
+}
+
+
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+void
+mpi_lshift_limbs( MPI a, unsigned int count )
+{
+    mpi_ptr_t ap = a->d;
+    int n = a->nlimbs;
+    int i;
+
+    if( !count || !n )
+	return;
+
+    RESIZE_IF_NEEDED( a, n+count );
+
+    for( i = n-1; i >= 0; i-- )
+	ap[i+count] = ap[i];
+    for(i=0; i < count; i++ )
+	ap[i] = 0;
+    a->nlimbs += count;
+}
+
+
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void
+mpi_rshift_limbs( MPI a, unsigned int count )
+{
+    mpi_ptr_t ap = a->d;
+    mpi_size_t n = a->nlimbs;
+    unsigned int i;
+
+    if( count >= n ) {
+	a->nlimbs = 0;
+	return;
+    }
+
+    for( i = 0; i < n - count; i++ )
+	ap[i] = ap[i+count];
+    ap[i] = 0;
+    a->nlimbs -= count;
+}
+
+
--- a/mpi/mpi-cmp.c
+++ b/mpi/mpi-cmp.c
@ -0,0 +1,74 @@
+/* mpi-cmp.c  -  MPI functions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+int
+mpi_cmp_ui( MPI u, unsigned long v )
+{
+    mpi_limb_t limb = v;
+
+    mpi_normalize( u );
+    if( !u->nlimbs && !limb )
+	return 0;
+    if( u->sign )
+	return -1;
+    if( u->nlimbs > 1 )
+	return 1;
+
+    if( u->d[0] == limb )
+	return 0;
+    else if( u->d[0] > limb )
+	return 1;
+    else
+	return -1;
+}
+
+int
+mpi_cmp( MPI u, MPI v )
+{
+    mpi_size_t usize, vsize;
+    int cmp;
+
+    mpi_normalize( u );
+    mpi_normalize( v );
+    usize = u->nlimbs;
+    vsize = v->nlimbs;
+    if( !u->sign && v->sign )
+	return 1;
+    if( u->sign && !v->sign )
+	return -1;
+    if( usize != vsize && !u->sign && !v->sign )
+	return usize - vsize;
+    if( usize != vsize && u->sign && v->sign )
+	return vsize + usize;
+    if( !usize )
+	return 0;
+    if( !(cmp=mpihelp_cmp( u->d, v->d, usize )) )
+	return 0;
+    if( (cmp < 0?1:0) == (u->sign?1:0))
+	return 1;
+    return -1;
+}
+
+
--- a/mpi/mpi-div.c
+++ b/mpi/mpi-div.c
@ -0,0 +1,322 @@
+/* mpi-div.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+
+void
+mpi_fdiv_r( MPI rem, MPI dividend, MPI divisor )
+{
+    int divisor_sign = divisor->sign;
+    MPI temp_divisor = NULL;
+
+    /* We need the original value of the divisor after the remainder has been
+     * preliminary calculated.	We have to copy it to temporary space if it's
+     * the same variable as REM.  */
+    if( rem == divisor ) {
+	temp_divisor = mpi_copy( divisor );
+	divisor = temp_divisor;
+    }
+
+    mpi_tdiv_r( rem, dividend, divisor );
+
+    if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs )
+	mpi_add( rem, rem, divisor);
+
+    if( temp_divisor )
+	mpi_free(temp_divisor);
+}
+
+
+
+/****************
+ * Division rounding the quotient towards -infinity.
+ * The remainder gets the same sign as the denominator.
+ * rem is optional
+ */
+
+ulong
+mpi_fdiv_r_ui( MPI rem, MPI dividend, ulong divisor )
+{
+    mpi_limb_t rlimb;
+
+    rlimb = mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor );
+    if( rlimb && dividend->sign )
+	rlimb = divisor - rlimb;
+
+    if( rem ) {
+	rem->d[0] = rlimb;
+	rem->nlimbs = rlimb? 1:0;
+    }
+    return rlimb;
+}
+
+
+void
+mpi_fdiv_q( MPI quot, MPI dividend, MPI divisor )
+{
+    MPI tmp = mpi_alloc( mpi_get_nlimbs(quot) );
+    mpi_fdiv_qr( quot, tmp, dividend, divisor);
+    mpi_free(tmp);
+}
+
+void
+mpi_fdiv_qr( MPI quot, MPI rem, MPI dividend, MPI divisor )
+{
+    int divisor_sign = divisor->sign;
+    MPI temp_divisor = NULL;
+
+    if( quot == divisor || rem == divisor ) {
+	temp_divisor = mpi_copy( divisor );
+	divisor = temp_divisor;
+    }
+
+    mpi_tdiv_qr( quot, rem, dividend, divisor );
+
+    if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) {
+	mpi_sub_ui( quot, quot, 1 );
+	mpi_add( rem, rem, divisor);
+    }
+
+    if( temp_divisor )
+	mpi_free(temp_divisor);
+}
+
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+
+void
+mpi_tdiv_r( MPI rem, MPI num, MPI den)
+{
+    mpi_tdiv_qr(NULL, rem, num, den );
+}
+
+void
+mpi_tdiv_qr( MPI quot, MPI rem, MPI num, MPI den)
+{
+    mpi_ptr_t np, dp;
+    mpi_ptr_t qp, rp;
+    mpi_size_t nsize = num->nlimbs;
+    mpi_size_t dsize = den->nlimbs;
+    mpi_size_t qsize, rsize;
+    mpi_size_t sign_remainder = num->sign;
+    mpi_size_t sign_quotient = num->sign ^ den->sign;
+    unsigned normalization_steps;
+    mpi_limb_t q_limb;
+    mpi_ptr_t marker[5];
+    int markidx=0;
+
+    /* Ensure space is enough for quotient and remainder.
+     * We need space for an extra limb in the remainder, because it's
+     * up-shifted (normalized) below.  */
+    rsize = nsize + 1;
+    mpi_resize( rem, rsize);
+
+    qsize = rsize - dsize;	  /* qsize cannot be bigger than this.	*/
+    if( qsize <= 0 ) {
+	if( num != rem ) {
+	    rem->nlimbs = num->nlimbs;
+	    rem->sign = num->sign;
+	    MPN_COPY(rem->d, num->d, nsize);
+	}
+	if( quot ) {
+	    /* This needs to follow the assignment to rem, in case the
+	     * numerator and quotient are the same.  */
+	    quot->nlimbs = 0;
+	    quot->sign = 0;
+	}
+	return;
+    }
+
+    if( quot )
+	mpi_resize( quot, qsize);
+
+    /* Read pointers here, when reallocation is finished.  */
+    np = num->d;
+    dp = den->d;
+    rp = rem->d;
+
+    /* Optimize division by a single-limb divisor.  */
+    if( dsize == 1 ) {
+	mpi_limb_t rlimb;
+	if( quot ) {
+	    qp = quot->d;
+	    rlimb = mpihelp_divmod_1( qp, np, nsize, dp[0] );
+	    qsize -= qp[qsize - 1] == 0;
+	    quot->nlimbs = qsize;
+	    quot->sign = sign_quotient;
+	}
+	else
+	    rlimb = mpihelp_mod_1( np, nsize, dp[0] );
+	rp[0] = rlimb;
+	rsize = rlimb != 0?1:0;
+	rem->nlimbs = rsize;
+	rem->sign = sign_remainder;
+	return;
+    }
+
+
+    if( quot ) {
+	qp = quot->d;
+	/* Make sure QP and NP point to different objects.  Otherwise the
+	 * numerator would be gradually overwritten by the quotient limbs.  */
+	if(qp == np) { /* Copy NP object to temporary space.  */
+	    np = marker[markidx++] = mpi_alloc_limb_space(nsize,
+							  mpi_is_secure(quot));
+	    MPN_COPY(np, qp, nsize);
+	}
+    }
+    else /* Put quotient at top of remainder. */
+	qp = rp + dsize;
+
+    count_leading_zeros( normalization_steps, dp[dsize - 1] );
+
+    /* Normalize the denominator, i.e. make its most significant bit set by
+     * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+     * numerator the same number of steps (to keep the quotient the same!).
+     */
+    if( normalization_steps ) {
+	mpi_ptr_t tp;
+	mpi_limb_t nlimb;
+
+	/* Shift up the denominator setting the most significant bit of
+	 * the most significant word.  Use temporary storage not to clobber
+	 * the original contents of the denominator.  */
+	tp = marker[markidx++] = mpi_alloc_limb_space(dsize,mpi_is_secure(den));
+	mpihelp_lshift( tp, dp, dsize, normalization_steps );
+	dp = tp;
+
+	/* Shift up the numerator, possibly introducing a new most
+	 * significant word.  Move the shifted numerator in the remainder
+	 * meanwhile.  */
+	nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+	if( nlimb ) {
+	    rp[nsize] = nlimb;
+	    rsize = nsize + 1;
+	}
+	else
+	    rsize = nsize;
+    }
+    else {
+	/* The denominator is already normalized, as required.	Copy it to
+	 * temporary space if it overlaps with the quotient or remainder.  */
+	if( dp == rp || (quot && (dp == qp))) {
+	    mpi_ptr_t tp;
+
+	    tp = marker[markidx++] = mpi_alloc_limb_space(dsize, mpi_is_secure(den));
+	    MPN_COPY( tp, dp, dsize );
+	    dp = tp;
+	}
+
+	/* Move the numerator to the remainder.  */
+	if( rp != np )
+	    MPN_COPY(rp, np, nsize);
+
+	rsize = nsize;
+    }
+
+    q_limb = mpihelp_divrem( qp, 0, rp, rsize, dp, dsize );
+
+    if( quot ) {
+	qsize = rsize - dsize;
+	if(q_limb) {
+	    qp[qsize] = q_limb;
+	    qsize += 1;
+	}
+
+	quot->nlimbs = qsize;
+	quot->sign = sign_quotient;
+    }
+
+    rsize = dsize;
+    MPN_NORMALIZE (rp, rsize);
+
+    if( normalization_steps && rsize ) {
+	mpihelp_rshift(rp, rp, rsize, normalization_steps);
+	rsize -= rp[rsize - 1] == 0?1:0;
+    }
+
+    rem->nlimbs = rsize;
+    rem->sign	= sign_remainder;
+    while( markidx )
+	mpi_free_limb_space(marker[--markidx]);
+}
+
+void
+mpi_tdiv_q_2exp( MPI w, MPI u, unsigned count )
+{
+    mpi_size_t usize, wsize;
+    mpi_size_t limb_cnt;
+
+    usize = u->nlimbs;
+    limb_cnt = count / BITS_PER_MPI_LIMB;
+    wsize = usize - limb_cnt;
+    if( limb_cnt >= usize )
+	w->nlimbs = 0;
+    else {
+	mpi_ptr_t wp;
+	mpi_ptr_t up;
+
+	RESIZE_IF_NEEDED( w, wsize );
+	wp = w->d;
+	up = u->d;
+
+	count %= BITS_PER_MPI_LIMB;
+	if( count ) {
+	    mpihelp_rshift( wp, up + limb_cnt, wsize, count );
+	    wsize -= !wp[wsize - 1];
+	}
+	else {
+	    MPN_COPY_INCR( wp, up + limb_cnt, wsize);
+	}
+
+	w->nlimbs = wsize;
+    }
+}
+
+/****************
+ * Check whether dividend is divisible by divisor
+ * (note: divisor must fit into a limb)
+ */
+int
+mpi_divisible_ui(MPI dividend, ulong divisor )
+{
+    return !mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor );
+}
+
--- a/mpi/mpi-gcd.c
+++ b/mpi/mpi-gcd.c
@ -0,0 +1,54 @@
+/* mpi-gcd.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/****************
+ * Find the greatest common divisor G of A and B.
+ * Return: true if this 1, false in all other cases
+ */
+int
+mpi_gcd( MPI g, MPI xa, MPI xb )
+{
+    MPI a, b;
+
+    a = mpi_copy(xa);
+    b = mpi_copy(xb);
+
+    /* TAOCP Vol II, 4.5.2, Algorithm A */
+    a->sign = 0;
+    b->sign = 0;
+    while( mpi_cmp_ui( b, 0 ) ) {
+	mpi_fdiv_r( g, a, b ); /* g used as temorary variable */
+	mpi_set(a,b);
+	mpi_set(b,g);
+    }
+    mpi_set(g, a);
+
+    mpi_free(a);
+    mpi_free(b);
+    return !mpi_cmp_ui( g, 1);
+}
+
+
+
--- a/mpi/mpi-inline.c
+++ b/mpi/mpi-inline.c
@ -0,0 +1,36 @@
+/* mpi-inline.c
+ * Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* put the inline functions as real functions into the lib */
+#define G10_MPI_INLINE_DECL
+
+#include "mpi-internal.h"
+
+/* always include the header becuase it is only
+ * included by mpi-internal if __GCC__ is defined but we
+ * need it here in all cases and the above definition of
+ * of the macro allows us to do so
+ */
+#include "mpi-inline.h"
+
--- a/mpi/mpi-inline.h
+++ b/mpi/mpi-inline.h
@ -0,0 +1,128 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+
+#ifndef G10_MPI_INLINE_DECL
+  #define G10_MPI_INLINE_DECL  extern __inline__
+#endif
+
+G10_MPI_INLINE_DECL  mpi_limb_t
+mpihelp_add_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	       mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+    mpi_limb_t x;
+
+    x = *s1_ptr++;
+    s2_limb += x;
+    *res_ptr++ = s2_limb;
+    if( s2_limb < x ) { /* sum is less than the left operand: handle carry */
+	while( --s1_size ) {
+	    x = *s1_ptr++ + 1;	/* add carry */
+	    *res_ptr++ = x;	/* and store */
+	    if( x )		/* not 0 (no overflow): we can stop */
+		goto leave;
+	}
+	return 1; /* return carry (size of s1 to small) */
+    }
+
+  leave:
+    if( res_ptr != s1_ptr ) { /* not the same variable */
+	mpi_size_t i;	       /* copy the rest */
+	for( i=0; i < s1_size-1; i++ )
+	    res_ptr[i] = s1_ptr[i];
+    }
+    return 0; /* no carry */
+}
+
+
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+			       mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+    mpi_limb_t cy = 0;
+
+    if( s2_size )
+	cy = mpihelp_add_n( res_ptr, s1_ptr, s2_ptr, s2_size );
+
+    if( s1_size - s2_size )
+	cy = mpihelp_add_1( res_ptr + s2_size, s1_ptr + s2_size,
+			    s1_size - s2_size, cy);
+    return cy;
+}
+
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb )
+{
+    mpi_limb_t x;
+
+    x = *s1_ptr++;
+    s2_limb = x - s2_limb;
+    *res_ptr++ = s2_limb;
+    if( s2_limb > x ) {
+	while( --s1_size ) {
+	    x = *s1_ptr++;
+	    *res_ptr++ = x - 1;
+	    if( x )
+		goto leave;
+	}
+	return 1;
+    }
+
+  leave:
+    if( res_ptr != s1_ptr ) {
+	mpi_size_t i;
+	for( i=0; i < s1_size-1; i++ )
+	    res_ptr[i] = s1_ptr[i];
+    }
+    return 0;
+}
+
+
+
+G10_MPI_INLINE_DECL   mpi_limb_t
+mpihelp_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+				mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+    mpi_limb_t cy = 0;
+
+    if( s2_size )
+	cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+    if( s1_size - s2_size )
+	cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+				      s1_size - s2_size, cy);
+    return cy;
+}
+
+
+#endif /*G10_MPI_INLINE_H*/
--- a/mpi/mpi-internal.h
+++ b/mpi/mpi-internal.h
@ -0,0 +1,259 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include "mpi.h"
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+    #define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+    #undef KARATSUBA_THRESHOLD
+    #define KARATSUBA_THRESHOLD 2
+#endif
+
+
+typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
+typedef int mpi_size_t;        /* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define RESIZE_IF_NEEDED(a,b) \
+    do {			   \
+	if( (a)->alloced < (b) )   \
+	    mpi_resize((a), (b));  \
+    } while(0)
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY( d, s, n) \
+    do {				\
+	mpi_size_t _i;			\
+	for( _i = 0; _i < (n); _i++ )	\
+	    (d)[_i] = (s)[_i];		\
+    } while(0)
+
+#define MPN_COPY_INCR( d, s, n) 	\
+    do {				\
+	mpi_size_t _i;			\
+	for( _i = 0; _i < (n); _i++ )	\
+	    (d)[_i] = (d)[_i];		\
+    } while (0)
+
+#define MPN_COPY_DECR( d, s, n ) \
+    do {				\
+	mpi_size_t _i;			\
+	for( _i = (n)-1; _i >= 0; _i--) \
+	   (d)[_i] = (s)[_i];		\
+    } while(0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+    do {				  \
+	int  _i;			  \
+	for( _i = 0; _i < (n); _i++ )  \
+	    (d)[_i] = 0;		    \
+    } while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+    do {		       \
+	while( (n) > 0 ) {     \
+	    if( (d)[(n)-1] ) \
+		break;	       \
+	    (n)--;	       \
+	}		       \
+    } while(0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+    do {				    \
+	for(;;) {			    \
+	    if( (d)[(n)-1] )		    \
+		break;			    \
+	    (n)--;			    \
+	}				    \
+    } while(0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+    do {						\
+	if( (size) < KARATSUBA_THRESHOLD )		\
+	    mul_n_basecase (prodp, up, vp, size);	\
+	else						\
+	    mul_n (prodp, up, vp, size, tspace);	\
+    } while (0);
+
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+    do {							    \
+	mpi_limb_t _q, _ql, _r; 				    \
+	mpi_limb_t _xh, _xl;					    \
+	umul_ppmm (_q, _ql, (nh), (di));			    \
+	_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */  \
+	umul_ppmm (_xh, _xl, _q, (d));				    \
+	sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);		    \
+	if( _xh ) {						    \
+	    sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
+	    _q++;						    \
+	    if( _xh) {						    \
+		sub_ddmmss (_xh, _r, _xh, _r, 0, (d));		    \
+		_q++;						    \
+	    }							    \
+	}							    \
+	if( _r >= (d) ) {					    \
+	    _r -= (d);						    \
+	    _q++;						    \
+	}							    \
+	(r) = _r;						    \
+	(q) = _q;						    \
+    } while (0)
+
+
+/*-- mpiutil.c --*/
+#ifdef M_DEBUG
+  #define mpi_alloc_limb_space(n,f)  mpi_debug_alloc_limb_space((n),(f), M_DBGINFO( __LINE__ ) )
+  #define mpi_free_limb_space(n)  mpi_debug_free_limb_space((n),  M_DBGINFO( __LINE__ ) )
+  mpi_ptr_t mpi_debug_alloc_limb_space( unsigned nlimbs, int sec, const char *info  );
+  void mpi_debug_free_limb_space( mpi_ptr_t a, const char *info );
+#else
+  mpi_ptr_t mpi_alloc_limb_space( unsigned nlimbs, int sec );
+  void mpi_free_limb_space( mpi_ptr_t a );
+#endif
+void mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs );
+
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs( MPI a, unsigned int count );
+void mpi_lshift_limbs( MPI a, unsigned int count );
+
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb );
+mpi_limb_t mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			  mpi_ptr_t s2_ptr,  mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1( mpi_ptr_t res_ptr,  mpi_ptr_t s1_ptr,
+			  mpi_size_t s1_size, mpi_limb_t s2_limb );
+mpi_limb_t mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			  mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
+
+/*-- mpihelp-mul.c --*/
+
+struct karatsuba_ctx {
+    struct karatsuba_ctx *next;
+    mpi_ptr_t tspace;
+    mpi_size_t tspace_size;
+    mpi_ptr_t tp;
+    mpi_size_t tp_size;
+};
+
+void mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx );
+
+mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			     mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			     mpi_size_t s1_size, mpi_limb_t s2_limb);
+void mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+						   mpi_size_t size);
+mpi_limb_t mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+					 mpi_ptr_t vp, mpi_size_t vsize);
+void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
+void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+						mpi_ptr_t tspace);
+
+void mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
+				 mpi_ptr_t up, mpi_size_t usize,
+				 mpi_ptr_t vp, mpi_size_t vsize,
+				 struct karatsuba_ctx *ctx );
+
+
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			  mpi_size_t s1_size, mpi_limb_t s2_limb);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+						 mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
+			   mpi_ptr_t np, mpi_size_t nsize,
+			   mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1( mpi_ptr_t quot_ptr,
+			     mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			     mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+							   unsigned cnt);
+mpi_limb_t mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+							   unsigned cnt);
+
+
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+  typedef mpi_limb_t   UWtype;
+  typedef unsigned int UHWtype;
+#if defined (__GNUC__)
+  typedef unsigned int UQItype	  __attribute__ ((mode (QI)));
+  typedef	   int SItype	  __attribute__ ((mode (SI)));
+  typedef unsigned int USItype	  __attribute__ ((mode (SI)));
+  typedef	   int DItype	  __attribute__ ((mode (DI)));
+  typedef unsigned int UDItype	  __attribute__ ((mode (DI)));
+#else
+  typedef unsigned char UQItype;
+  typedef	   long SItype;
+  typedef unsigned long USItype;
+#endif
+
+#ifdef __GNUC__
+  #include "mpi-inline.h"
+#endif
+
+#endif /*G10_MPI_INTERNAL_H*/
--- a/mpi/mpi-inv.c
+++ b/mpi/mpi-inv.c
@ -0,0 +1,270 @@
+/* mpi-inv.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *		1 = (a*x) mod n
+ */
+void
+mpi_invm( MPI x, MPI a, MPI n )
+{
+  #if 0
+    MPI u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
+    MPI ta, tb, tc;
+
+    u = mpi_copy(a);
+    v = mpi_copy(n);
+    u1 = mpi_alloc_set_ui(1);
+    u2 = mpi_alloc_set_ui(0);
+    u3 = mpi_copy(u);
+    v1 = mpi_alloc_set_ui(0);
+    v2 = mpi_alloc_set_ui(1);
+    v3 = mpi_copy(v);
+    q  = mpi_alloc( mpi_get_nlimbs(u)+1 );
+    t1 = mpi_alloc( mpi_get_nlimbs(u)+1 );
+    t2 = mpi_alloc( mpi_get_nlimbs(u)+1 );
+    t3 = mpi_alloc( mpi_get_nlimbs(u)+1 );
+    while( mpi_cmp_ui( v3, 0 ) ) {
+	mpi_fdiv_q( q, u3, v3 );
+	mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
+	mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
+	mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
+	mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
+    }
+    /*	log_debug("result:\n");
+	log_mpidump("q =", q );
+	log_mpidump("u1=", u1);
+	log_mpidump("u2=", u2);
+	log_mpidump("u3=", u3);
+	log_mpidump("v1=", v1);
+	log_mpidump("v2=", v2); */
+    mpi_set(x, u1);
+
+    mpi_free(u1);
+    mpi_free(u2);
+    mpi_free(u3);
+    mpi_free(v1);
+    mpi_free(v2);
+    mpi_free(v3);
+    mpi_free(q);
+    mpi_free(t1);
+    mpi_free(t2);
+    mpi_free(t3);
+    mpi_free(u);
+    mpi_free(v);
+  #elif 0
+    /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+     * modified according to Michael Penk's solution for Exercice 35 */
+
+    /* FIXME: we can simplify this in most cases (see Knuth) */
+    MPI u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
+    unsigned k;
+    int sign;
+
+    u = mpi_copy(a);
+    v = mpi_copy(n);
+    for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
+	mpi_rshift(u, u, 1);
+	mpi_rshift(v, v, 1);
+    }
+
+
+    u1 = mpi_alloc_set_ui(1);
+    u2 = mpi_alloc_set_ui(0);
+    u3 = mpi_copy(u);
+    v1 = mpi_copy(v);				   /* !-- used as const 1 */
+    v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u );
+    v3 = mpi_copy(v);
+    if( mpi_test_bit(u, 0) ) { /* u is odd */
+	t1 = mpi_alloc_set_ui(0);
+	t2 = mpi_alloc_set_ui(1); t2->sign = 1;
+	t3 = mpi_copy(v); t3->sign = !t3->sign;
+	goto Y4;
+    }
+    else {
+	t1 = mpi_alloc_set_ui(1);
+	t2 = mpi_alloc_set_ui(0);
+	t3 = mpi_copy(u);
+    }
+    do {
+	do {
+	    if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
+		mpi_add(t1, t1, v);
+		mpi_sub(t2, t2, u);
+	    }
+	    mpi_rshift(t1, t1, 1);
+	    mpi_rshift(t2, t2, 1);
+	    mpi_rshift(t3, t3, 1);
+	  Y4:
+	    ;
+	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
+
+	if( !t3->sign ) {
+	    mpi_set(u1, t1);
+	    mpi_set(u2, t2);
+	    mpi_set(u3, t3);
+	}
+	else {
+	    mpi_sub(v1, v, t1);
+	    sign = u->sign; u->sign = !u->sign;
+	    mpi_sub(v2, u, t2);
+	    u->sign = sign;
+	    sign = t3->sign; t3->sign = !t3->sign;
+	    mpi_set(v3, t3);
+	    t3->sign = sign;
+	}
+	mpi_sub(t1, u1, v1);
+	mpi_sub(t2, u2, v2);
+	mpi_sub(t3, u3, v3);
+	if( t1->sign ) {
+	    mpi_add(t1, t1, v);
+	    mpi_sub(t2, t2, u);
+	}
+    } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
+    /* mpi_lshift( u3, k ); */
+    mpi_set(x, u1);
+
+    mpi_free(u1);
+    mpi_free(u2);
+    mpi_free(u3);
+    mpi_free(v1);
+    mpi_free(v2);
+    mpi_free(v3);
+    mpi_free(t1);
+    mpi_free(t2);
+    mpi_free(t3);
+  #else
+    /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+     * modified according to Michael Penk's solution for Exercice 35
+     * with further enhancement */
+    MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
+    unsigned k;
+    int sign;
+    int odd ;
+
+    u = mpi_copy(a);
+    v = mpi_copy(n);
+
+    for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
+	mpi_rshift(u, u, 1);
+	mpi_rshift(v, v, 1);
+    }
+    odd = mpi_test_bit(v,0);
+
+    u1 = mpi_alloc_set_ui(1);
+    if( !odd )
+	u2 = mpi_alloc_set_ui(0);
+    u3 = mpi_copy(u);
+    v1 = mpi_copy(v);
+    if( !odd ) {
+	v2 = mpi_alloc( mpi_get_nlimbs(u) );
+	mpi_sub( v2, u1, u ); /* U is used as const 1 */
+    }
+    v3 = mpi_copy(v);
+    if( mpi_test_bit(u, 0) ) { /* u is odd */
+	t1 = mpi_alloc_set_ui(0);
+	if( !odd ) {
+	    t2 = mpi_alloc_set_ui(1); t2->sign = 1;
+	}
+	t3 = mpi_copy(v); t3->sign = !t3->sign;
+	goto Y4;
+    }
+    else {
+	t1 = mpi_alloc_set_ui(1);
+	if( !odd )
+	    t2 = mpi_alloc_set_ui(0);
+	t3 = mpi_copy(u);
+    }
+    do {
+	do {
+	    if( !odd ) {
+		if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
+		    mpi_add(t1, t1, v);
+		    mpi_sub(t2, t2, u);
+		}
+		mpi_rshift(t1, t1, 1);
+		mpi_rshift(t2, t2, 1);
+		mpi_rshift(t3, t3, 1);
+	    }
+	    else {
+		if( mpi_test_bit(t1, 0) )
+		    mpi_add(t1, t1, v);
+		mpi_rshift(t1, t1, 1);
+		mpi_rshift(t3, t3, 1);
+	    }
+	  Y4:
+	    ;
+	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
+
+	if( !t3->sign ) {
+	    mpi_set(u1, t1);
+	    if( !odd )
+		mpi_set(u2, t2);
+	    mpi_set(u3, t3);
+	}
+	else {
+	    mpi_sub(v1, v, t1);
+	    sign = u->sign; u->sign = !u->sign;
+	    if( !odd )
+		mpi_sub(v2, u, t2);
+	    u->sign = sign;
+	    sign = t3->sign; t3->sign = !t3->sign;
+	    mpi_set(v3, t3);
+	    t3->sign = sign;
+	}
+	mpi_sub(t1, u1, v1);
+	if( !odd )
+	    mpi_sub(t2, u2, v2);
+	mpi_sub(t3, u3, v3);
+	if( t1->sign ) {
+	    mpi_add(t1, t1, v);
+	    if( !odd )
+		mpi_sub(t2, t2, u);
+	}
+    } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
+    /* mpi_lshift( u3, k ); */
+    mpi_set(x, u1);
+
+    mpi_free(u1);
+    mpi_free(v1);
+    mpi_free(t1);
+    if( !odd ) {
+	mpi_free(u2);
+	mpi_free(v2);
+	mpi_free(t2);
+    }
+    mpi_free(u3);
+    mpi_free(v3);
+    mpi_free(t3);
+
+    mpi_free(u);
+    mpi_free(v);
+  #endif
+}
+
+
+
--- a/mpi/mpi-mpow.c
+++ b/mpi/mpi-mpow.c
@ -0,0 +1,101 @@
+/* mpi-mpow.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#include <assert.h>
+
+static int
+build_index( MPI *exparray, int k, int i, int t )
+{
+    int j, bitno;
+    int index = 0;
+
+    bitno = t-i;
+    for(j=k-1; j >= 0; j-- ) {
+	index <<= 1;
+	if( mpi_test_bit( exparray[j], bitno ) )
+	    index |= 1;
+    }
+    return index;
+}
+
+/****************
+ * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+void
+mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+    int k;	/* number of elements */
+    int t;	/* bit size of largest exponent */
+    int i, j, idx;
+    MPI *G;	/* table with precomputed values of size 2^k */
+    MPI tmp;
+
+    for(k=0; basearray[k]; k++ )
+	;
+    assert(k);
+    for(t=0, i=0; (tmp=exparray[i]); i++ ) {
+	j = mpi_get_nbits(tmp);
+	if( j > t )
+	    t = j;
+    }
+    assert(i==k);
+    assert(t);
+    assert( k < 10 );
+
+    G = m_alloc_clear( (1<<k) * sizeof *G );
+    /* and calculate */
+    tmp =  mpi_alloc( mpi_get_nlimbs(m)+1 );
+    mpi_set_ui( res, 1 );
+    for(i = 1; i <= t; i++ ) {
+	mpi_mulm(tmp, res, res, m );
+	idx = build_index( exparray, k, i, t );
+	assert( idx >= 0 && idx < (1<<k) );
+	if( !G[idx] ) {
+	    if( !idx )
+		 G[0] = mpi_alloc_set_ui( 1 );
+	    else {
+		for(j=0; j < k; j++ ) {
+		    if( (idx & (1<<j) ) ) {
+			if( !G[idx] )
+			    G[idx] = mpi_copy( basearray[j] );
+			else
+			    mpi_mulm( G[idx], G[idx], basearray[j], m );
+		    }
+		}
+		if( !G[idx] )
+		    G[idx] = mpi_alloc(0);
+	    }
+	}
+	mpi_mulm(res, tmp, G[idx], m );
+    }
+
+    /* cleanup */
+    mpi_free(tmp);
+    for(i=0; i < (1<<k); i++ )
+	mpi_free(G[i]);
+    m_free(G);
+}
+
+
--- a/mpi/mpi-mul.c
+++ b/mpi/mpi-mul.c
@ -0,0 +1,215 @@
+/* mpi-mul.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+
+void
+mpi_mul_ui( MPI prod, MPI mult, unsigned long small_mult )
+{
+    mpi_size_t size, prod_size;
+    mpi_ptr_t  prod_ptr;
+    mpi_limb_t cy;
+    int sign;
+
+    size = mult->nlimbs;
+    sign = mult->sign;
+
+    if( !size || !small_mult ) {
+	prod->nlimbs = 0;
+	prod->sign = 0;
+	return;
+    }
+
+    prod_size = size + 1;
+    if( prod->alloced < prod_size )
+	mpi_resize( prod, prod_size );
+    prod_ptr = prod->d;
+
+    cy = mpihelp_mul_1( prod_ptr, mult->d, size, (mpi_limb_t)small_mult );
+    if( cy )
+	prod_ptr[size++] = cy;
+    prod->nlimbs = size;
+    prod->sign = sign;
+}
+
+
+void
+mpi_mul_2exp( MPI w, MPI u, unsigned long cnt)
+{
+    mpi_size_t usize, wsize, limb_cnt;
+    mpi_ptr_t wp;
+    mpi_limb_t wlimb;
+    int usign, wsign;
+
+    usize = u->nlimbs;
+    usign = u->sign;
+
+    if( !usize ) {
+	w->nlimbs = 0;
+	w->sign = 0;
+	return;
+    }
+
+    limb_cnt = cnt / BITS_PER_MPI_LIMB;
+    wsize = usize + limb_cnt + 1;
+    if( w->alloced < wsize )
+	mpi_resize(w, wsize );
+    wp = w->d;
+    wsize = usize + limb_cnt;
+    wsign = usign;
+
+    cnt %= BITS_PER_MPI_LIMB;
+    if( cnt ) {
+	wlimb = mpihelp_lshift( wp + limb_cnt, u->d, usize, cnt );
+	if( wlimb ) {
+	    wp[wsize] = wlimb;
+	    wsize++;
+	}
+    }
+    else {
+	MPN_COPY_DECR( wp + limb_cnt, u->d, usize );
+    }
+
+    /* Zero all whole limbs at low end.  Do it here and not before calling
+     * mpn_lshift, not to lose for U == W.  */
+    MPN_ZERO( wp, limb_cnt );
+
+    w->nlimbs = wsize;
+    w->sign = wsign;
+}
+
+
+
+void
+mpi_mul( MPI w, MPI u, MPI v)
+{
+    mpi_size_t usize, vsize, wsize;
+    mpi_ptr_t up, vp, wp;
+    mpi_limb_t cy;
+    int usign, vsign, usecure, vsecure, sign_product;
+    int assign_wp=0;
+    mpi_ptr_t tmp_limb=NULL;
+
+
+    if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */
+	usize = v->nlimbs;
+	usign = v->sign;
+	usecure = mpi_is_secure(v);
+	up    = v->d;
+	vsize = u->nlimbs;
+	vsign = u->sign;
+	vsecure = mpi_is_secure(u);
+	vp    = u->d;
+    }
+    else {
+	usize = u->nlimbs;
+	usign = u->sign;
+	usecure = mpi_is_secure(u);
+	up    = u->d;
+	vsize = v->nlimbs;
+	vsign = v->sign;
+	vsecure = mpi_is_secure(v);
+	vp    = v->d;
+    }
+    sign_product = usign ^ vsign;
+    wp = w->d;
+
+    /* Ensure W has space enough to store the result.  */
+    wsize = usize + vsize;
+    if ( !mpi_is_secure (w) && (mpi_is_secure (u) || mpi_is_secure (v)) ) {
+        /* w is not allocated in secure space but u or v is.  To make sure
+         * that no temporray results are stored in w, we temporary use 
+         * a newly allocated limb space for w */
+        wp = mpi_alloc_limb_space( wsize, 1 );
+        assign_wp = 2; /* mark it as 2 so that we can later copy it back to
+                        * mormal memory */
+    }
+    else if( w->alloced < wsize ) {
+	if( wp == up || wp == vp ) {
+	    wp = mpi_alloc_limb_space( wsize, mpi_is_secure(w) );
+	    assign_wp = 1;
+	}
+	else {
+	    mpi_resize(w, wsize );
+	    wp = w->d;
+	}
+    }
+    else { /* Make U and V not overlap with W.	*/
+	if( wp == up ) {
+	    /* W and U are identical.  Allocate temporary space for U.	*/
+	    up = tmp_limb = mpi_alloc_limb_space( usize, usecure  );
+	    /* Is V identical too?  Keep it identical with U.  */
+	    if( wp == vp )
+		vp = up;
+	    /* Copy to the temporary space.  */
+	    MPN_COPY( up, wp, usize );
+	}
+	else if( wp == vp ) {
+	    /* W and V are identical.  Allocate temporary space for V.	*/
+	    vp = tmp_limb = mpi_alloc_limb_space( vsize, vsecure );
+	    /* Copy to the temporary space.  */
+	    MPN_COPY( vp, wp, vsize );
+	}
+    }
+
+    if( !vsize )
+	wsize = 0;
+    else {
+	cy = mpihelp_mul( wp, up, usize, vp, vsize );
+	wsize -= cy? 0:1;
+    }
+
+    if( assign_wp ) {
+        if (assign_wp == 2) {
+            /* copy the temp wp from secure memory back to normal memory */
+	    mpi_ptr_t tmp_wp = mpi_alloc_limb_space (wsize, 0);
+	    MPN_COPY (tmp_wp, wp, wsize);
+            mpi_free_limb_space (wp);
+            wp = tmp_wp;
+        }
+	mpi_assign_limb_space( w, wp, wsize );
+    }
+    w->nlimbs = wsize;
+    w->sign = sign_product;
+    if( tmp_limb )
+	mpi_free_limb_space( tmp_limb );
+}
+
+
+void
+mpi_mulm( MPI w, MPI u, MPI v, MPI m)
+{
+    mpi_mul(w, u, v);
+    mpi_fdiv_r( w, w, m );
+}
+
--- a/mpi/mpi-pow.c
+++ b/mpi/mpi-pow.c
@ -0,0 +1,294 @@
+/* mpi-pow.c  -  MPI functions
+ *	Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#include <assert.h>
+
+
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+void
+mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
+{
+    mpi_ptr_t  rp, ep, mp, bp;
+    mpi_size_t esize, msize, bsize, rsize;
+    int        esign, msign, bsign, rsign;
+    int        esec,  msec,  bsec,  rsec;
+    mpi_size_t size;
+    int mod_shift_cnt;
+    int negative_result;
+    mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL;
+    mpi_ptr_t xp_marker=NULL;
+    int assign_rp=0;
+    mpi_ptr_t tspace = NULL;
+    mpi_size_t tsize=0;   /* to avoid compiler warning */
+			  /* fixme: we should check that the warning is void*/
+
+    esize = exp->nlimbs;
+    msize = mod->nlimbs;
+    size = 2 * msize;
+    esign = exp->sign;
+    msign = mod->sign;
+
+    esec = mpi_is_secure(exp);
+    msec = mpi_is_secure(mod);
+    bsec = mpi_is_secure(base);
+    rsec = mpi_is_secure(res);
+
+    rp = res->d;
+    ep = exp->d;
+
+    if( !msize )
+	msize = 1 / msize;	    /* provoke a signal */
+
+    if( !esize ) {
+	/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+	 * depending on if MOD equals 1.  */
+	rp[0] = 1;
+	res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+	res->sign = 0;
+	goto leave;
+    }
+
+    /* Normalize MOD (i.e. make its most significant bit set) as required by
+     * mpn_divrem.  This will make the intermediate values in the calculation
+     * slightly larger, but the correct result is obtained after a final
+     * reduction using the original MOD value.	*/
+    mp = mp_marker = mpi_alloc_limb_space(msize, msec);
+    count_leading_zeros( mod_shift_cnt, mod->d[msize-1] );
+    if( mod_shift_cnt )
+	mpihelp_lshift( mp, mod->d, msize, mod_shift_cnt );
+    else
+	MPN_COPY( mp, mod->d, msize );
+
+    bsize = base->nlimbs;
+    bsign = base->sign;
+    if( bsize > msize ) { /* The base is larger than the module. Reduce it. */
+	/* Allocate (BSIZE + 1) with space for remainder and quotient.
+	 * (The quotient is (bsize - msize + 1) limbs.)  */
+	bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
+	MPN_COPY( bp, base->d, bsize );
+	/* We don't care about the quotient, store it above the remainder,
+	 * at BP + MSIZE.  */
+	mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
+	bsize = msize;
+	/* Canonicalize the base, since we are going to multiply with it
+	 * quite a few times.  */
+	MPN_NORMALIZE( bp, bsize );
+    }
+    else
+	bp = base->d;
+
+    if( !bsize ) {
+	res->nlimbs = 0;
+	res->sign = 0;
+	goto leave;
+    }
+
+    if( res->alloced < size ) {
+	/* We have to allocate more space for RES.  If any of the input
+	 * parameters are identical to RES, defer deallocation of the old
+	 * space.  */
+	if( rp == ep || rp == mp || rp == bp ) {
+	    rp = mpi_alloc_limb_space( size, rsec );
+	    assign_rp = 1;
+	}
+	else {
+	    mpi_resize( res, size );
+	    rp = res->d;
+	}
+    }
+    else { /* Make BASE, EXP and MOD not overlap with RES.  */
+	if( rp == bp ) {
+	    /* RES and BASE are identical.  Allocate temp. space for BASE.  */
+	    assert( !bp_marker );
+	    bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
+	    MPN_COPY(bp, rp, bsize);
+	}
+	if( rp == ep ) {
+	    /* RES and EXP are identical.  Allocate temp. space for EXP.  */
+	    ep = ep_marker = mpi_alloc_limb_space( esize, esec );
+	    MPN_COPY(ep, rp, esize);
+	}
+	if( rp == mp ) {
+	    /* RES and MOD are identical.  Allocate temporary space for MOD.*/
+	    assert( !mp_marker );
+	    mp = mp_marker = mpi_alloc_limb_space( msize, msec );
+	    MPN_COPY(mp, rp, msize);
+	}
+    }
+
+    MPN_COPY( rp, bp, bsize );
+    rsize = bsize;
+    rsign = bsign;
+
+    {
+	mpi_size_t i;
+	mpi_ptr_t xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
+	int c;
+	mpi_limb_t e;
+	mpi_limb_t carry_limb;
+	struct karatsuba_ctx karactx;
+
+	memset( &karactx, 0, sizeof karactx );
+	negative_result = (ep[0] & 1) && base->sign;
+
+	i = esize - 1;
+	e = ep[i];
+	count_leading_zeros (c, e);
+	e = (e << c) << 1;     /* shift the exp bits to the left, lose msb */
+	c = BITS_PER_MPI_LIMB - 1 - c;
+
+	/* Main loop.
+	 *
+	 * Make the result be pointed to alternately by XP and RP.  This
+	 * helps us avoid block copying, which would otherwise be necessary
+	 * with the overlap restrictions of mpihelp_divmod. With 50% probability
+	 * the result after this loop will be in the area originally pointed
+	 * by RP (==RES->d), and with 50% probability in the area originally
+	 * pointed to by XP.
+	 */
+
+	for(;;) {
+	    while( c ) {
+		mpi_ptr_t tp;
+		mpi_size_t xsize;
+
+		/*mpihelp_mul_n(xp, rp, rp, rsize);*/
+		if( rsize < KARATSUBA_THRESHOLD )
+		    mpih_sqr_n_basecase( xp, rp, rsize );
+		else {
+		    if( !tspace ) {
+			tsize = 2 * rsize;
+			tspace = mpi_alloc_limb_space( tsize, 0 );
+		    }
+		    else if( tsize < (2*rsize) ) {
+			mpi_free_limb_space( tspace );
+			tsize = 2 * rsize;
+			tspace = mpi_alloc_limb_space( tsize, 0 );
+		    }
+		    mpih_sqr_n( xp, rp, rsize, tspace );
+		}
+
+		xsize = 2 * rsize;
+		if( xsize > msize ) {
+		    mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
+		    xsize = msize;
+		}
+
+		tp = rp; rp = xp; xp = tp;
+		rsize = xsize;
+
+		if( (mpi_limb_signed_t)e < 0 ) {
+		    /*mpihelp_mul( xp, rp, rsize, bp, bsize );*/
+		    if( bsize < KARATSUBA_THRESHOLD ) {
+			mpihelp_mul( xp, rp, rsize, bp, bsize );
+		    }
+		    else {
+			mpihelp_mul_karatsuba_case(
+				     xp, rp, rsize, bp, bsize, &karactx );
+		    }
+
+		    xsize = rsize + bsize;
+		    if( xsize > msize ) {
+			mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
+			xsize = msize;
+		    }
+
+		    tp = rp; rp = xp; xp = tp;
+		    rsize = xsize;
+		}
+		e <<= 1;
+		c--;
+	    }
+
+	    i--;
+	    if( i < 0 )
+		break;
+	    e = ep[i];
+	    c = BITS_PER_MPI_LIMB;
+	}
+
+	/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+	 * steps.  Adjust the result by reducing it with the original MOD.
+	 *
+	 * Also make sure the result is put in RES->d (where it already
+	 * might be, see above).
+	 */
+	if( mod_shift_cnt ) {
+	    carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
+	    rp = res->d;
+	    if( carry_limb ) {
+		rp[rsize] = carry_limb;
+		rsize++;
+	    }
+	}
+	else {
+	    MPN_COPY( res->d, rp, rsize);
+	    rp = res->d;
+	}
+
+	if( rsize >= msize ) {
+	    mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+	    rsize = msize;
+	}
+
+	/* Remove any leading zero words from the result.  */
+	if( mod_shift_cnt )
+	    mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
+	MPN_NORMALIZE (rp, rsize);
+
+	mpihelp_release_karatsuba_ctx( &karactx );
+    }
+
+    if( negative_result && rsize ) {
+	if( mod_shift_cnt )
+	    mpihelp_rshift( mp, mp, msize, mod_shift_cnt);
+	mpihelp_sub( rp, mp, msize, rp, rsize);
+	rsize = msize;
+	rsign = msign;
+	MPN_NORMALIZE(rp, rsize);
+    }
+    res->nlimbs = rsize;
+    res->sign = rsign;
+
+  leave:
+    if( assign_rp ) mpi_assign_limb_space( res, rp, size );
+    if( mp_marker ) mpi_free_limb_space( mp_marker );
+    if( bp_marker ) mpi_free_limb_space( bp_marker );
+    if( ep_marker ) mpi_free_limb_space( ep_marker );
+    if( xp_marker ) mpi_free_limb_space( xp_marker );
+    if( tspace )    mpi_free_limb_space( tspace );
+}
+
--- a/mpi/mpi-scan.c
+++ b/mpi/mpi-scan.c
@ -0,0 +1,132 @@
+/* mpi-scan.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/****************
+ * Scan through an mpi and return byte for byte. a -1 is returned to indicate
+ * the end of the mpi. Scanning is done from the lsb to the msb, returned
+ * values are in the range of 0 .. 255.
+ *
+ * FIXME: This code is VERY ugly!
+ */
+int
+mpi_getbyte( MPI a, unsigned idx )
+{
+    int i, j;
+    unsigned n;
+    mpi_ptr_t ap;
+    mpi_limb_t limb;
+
+    ap = a->d;
+    for(n=0,i=0; i < a->nlimbs; i++ ) {
+	limb = ap[i];
+	for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
+	    if( n == idx )
+		return (limb >> j*8) & 0xff;
+    }
+    return -1;
+}
+
+
+/****************
+ * Put a value at position IDX into A. idx counts from lsb to msb
+ */
+void
+mpi_putbyte( MPI a, unsigned idx, int xc )
+{
+    int i, j;
+    unsigned n;
+    mpi_ptr_t ap;
+    mpi_limb_t limb, c;
+
+    c = xc & 0xff;
+    ap = a->d;
+    for(n=0,i=0; i < a->alloced; i++ ) {
+	limb = ap[i];
+	for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
+	    if( n == idx ) {
+	      #if BYTES_PER_MPI_LIMB == 4
+		if( j == 0 )
+		    limb = (limb & 0xffffff00) | c;
+		else if( j == 1 )
+		    limb = (limb & 0xffff00ff) | (c<<8);
+		else if( j == 2 )
+		    limb = (limb & 0xff00ffff) | (c<<16);
+		else
+		    limb = (limb & 0x00ffffff) | (c<<24);
+	      #elif BYTES_PER_MPI_LIMB == 8
+		if( j == 0 )
+		    limb = (limb & 0xffffffffffffff00) | c;
+		else if( j == 1 )
+		    limb = (limb & 0xffffffffffff00ff) | (c<<8);
+		else if( j == 2 )
+		    limb = (limb & 0xffffffffff00ffff) | (c<<16);
+		else if( j == 3 )
+		    limb = (limb & 0xffffffff00ffffff) | (c<<24);
+		else if( j == 4 )
+		    limb = (limb & 0xffffff00ffffffff) | (c<<32);
+		else if( j == 5 )
+		    limb = (limb & 0xffff00ffffffffff) | (c<<40);
+		else if( j == 6 )
+		    limb = (limb & 0xff00ffffffffffff) | (c<<48);
+		else
+		    limb = (limb & 0x00ffffffffffffff) | (c<<56);
+	      #else
+		 #error please enhance this function, its ugly - i know.
+	      #endif
+		if( a->nlimbs <= i )
+		    a->nlimbs = i+1;
+		ap[i] = limb;
+		return;
+	    }
+    }
+    abort(); /* index out of range */
+}
+
+
+/****************
+ * Count the number of zerobits at the low end of A
+ */
+unsigned
+mpi_trailing_zeros( MPI a )
+{
+    unsigned n, count = 0;
+
+    for(n=0; n < a->nlimbs; n++ ) {
+	if( a->d[n] ) {
+	    unsigned nn;
+	    mpi_limb_t alimb = a->d[n];
+
+	    count_trailing_zeros( nn, alimb );
+	    count += nn;
+	    break;
+	}
+	count += BITS_PER_MPI_LIMB;
+    }
+    return count;
+
+}
+
+
--- a/mpi/mpicoder.c
+++ b/mpi/mpicoder.c
@ -0,0 +1,446 @@
+/* mpicoder.c  -  Coder for the external representation of MPIs
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "mpi.h"
+#include "mpi-internal.h"
+#include "iobuf.h"
+#include "memory.h"
+#include "util.h"
+
+#ifdef M_DEBUG
+  #undef mpi_read
+#endif
+
+#define MAX_EXTERN_MPI_BITS 16384
+
+/****************
+ * write an mpi to out.
+ */
+int
+mpi_write( IOBUF out, MPI a )
+{
+    int rc;
+    unsigned nbits = mpi_get_nbits(a);
+    byte *p, *buf;
+    unsigned n;
+
+    if( nbits > MAX_EXTERN_MPI_BITS )
+	log_bug("mpi_encode: mpi too large (%u bits)\n", nbits);
+
+    iobuf_put(out, (nbits >>8) );
+    iobuf_put(out, (nbits) );
+
+    p = buf = mpi_get_buffer( a, &n, NULL );
+    rc = iobuf_write( out, p, n );
+    m_free(buf);
+    return rc;
+}
+
+
+/****************
+ * Read an external representation of an mpi and return the MPI
+ * The external format is a 16 bit unsigned value stored in network byte order,
+ * giving the number of bits for the following integer. The integer is stored
+ * with MSB first (left padded with zeroes to align on a byte boundary).
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_read(IOBUF inp, unsigned *ret_nread, int secure, const char *info)
+#else
+mpi_read(IOBUF inp, unsigned *ret_nread, int secure)
+#endif
+{
+    int c, i, j;
+    unsigned nbits, nbytes, nlimbs, nread=0;
+    mpi_limb_t a;
+    MPI val = MPI_NULL;
+
+    if( (c = iobuf_get(inp)) == -1 )
+	goto leave;
+    nbits = c << 8;
+    if( (c = iobuf_get(inp)) == -1 )
+	goto leave;
+    nbits |= c;
+    if( nbits > MAX_EXTERN_MPI_BITS ) {
+	log_error("mpi too large (%u bits)\n", nbits);
+	goto leave;
+    }
+    nread = 2;
+
+    nbytes = (nbits+7) / 8;
+    nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+  #ifdef M_DEBUG
+    val = secure? mpi_debug_alloc_secure( nlimbs, info )
+		: mpi_debug_alloc( nlimbs, info );
+  #else
+    val = secure? mpi_alloc_secure( nlimbs )
+		: mpi_alloc( nlimbs );
+  #endif
+    i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+    i %= BYTES_PER_MPI_LIMB;
+    val->nbits = nbits;
+    j= val->nlimbs = nlimbs;
+    val->sign = 0;
+    for( ; j > 0; j-- ) {
+	a = 0;
+	for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+	    a <<= 8;
+	    a |= iobuf_get(inp) & 0xff; nread++;
+	}
+	i = 0;
+	val->d[j-1] = a;
+    }
+
+  leave:
+    if( nread > *ret_nread )
+	log_bug("mpi crosses packet border");
+    else
+	*ret_nread = nread;
+    return val;
+}
+
+
+MPI
+mpi_read_from_buffer(byte *buffer, unsigned *ret_nread, int secure)
+{
+    int i, j;
+    unsigned nbits, nbytes, nlimbs, nread=0;
+    mpi_limb_t a;
+    MPI val = MPI_NULL;
+
+    if( *ret_nread < 2 )
+	goto leave;
+    nbits = buffer[0] << 8 | buffer[1];
+    if( nbits > MAX_EXTERN_MPI_BITS ) {
+	log_error("mpi too large (%u bits)\n", nbits);
+	goto leave;
+    }
+    buffer += 2;
+    nread = 2;
+
+    nbytes = (nbits+7) / 8;
+    nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+    val = secure? mpi_alloc_secure( nlimbs )
+		: mpi_alloc( nlimbs );
+    i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+    i %= BYTES_PER_MPI_LIMB;
+    val->nbits = nbits;
+    j= val->nlimbs = nlimbs;
+    val->sign = 0;
+    for( ; j > 0; j-- ) {
+	a = 0;
+	for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+	    if( ++nread > *ret_nread )
+		log_bug("mpi larger than buffer");
+	    a <<= 8;
+	    a |= *buffer++;
+	}
+	i = 0;
+	val->d[j-1] = a;
+    }
+
+  leave:
+    *ret_nread = nread;
+    return val;
+}
+
+
+/****************
+ * Make an mpi from a character string.
+ */
+int
+mpi_fromstr(MPI val, const char *str)
+{
+    int hexmode=0, sign=0, prepend_zero=0, i, j, c, c1, c2;
+    unsigned nbits, nbytes, nlimbs;
+    mpi_limb_t a;
+
+    if( *str == '-' ) {
+	sign = 1;
+	str++;
+    }
+    if( *str == '0' && str[1] == 'x' )
+	hexmode = 1;
+    else
+	return 1; /* other bases are not yet supported */
+    str += 2;
+
+    nbits = strlen(str)*4;
+    if( nbits % 8 )
+	prepend_zero = 1;
+    nbytes = (nbits+7) / 8;
+    nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+    if( val->alloced < nlimbs )
+	mpi_resize(val, nlimbs );
+    i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+    i %= BYTES_PER_MPI_LIMB;
+    j= val->nlimbs = nlimbs;
+    val->sign = sign;
+    for( ; j > 0; j-- ) {
+	a = 0;
+	for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+	    if( prepend_zero ) {
+		c1 = '0';
+		prepend_zero = 0;
+	    }
+	    else
+		c1 = *str++;
+	    assert(c1);
+	    c2 = *str++;
+	    assert(c2);
+	    if( c1 >= '0' && c1 <= '9' )
+		c = c1 - '0';
+	    else if( c1 >= 'a' && c1 <= 'f' )
+		c = c1 - 'a' + 10;
+	    else if( c1 >= 'A' && c1 <= 'F' )
+		c = c1 - 'A' + 10;
+	    else {
+		mpi_clear(val);
+		return 1;
+	    }
+	    c <<= 4;
+	    if( c2 >= '0' && c2 <= '9' )
+		c |= c2 - '0';
+	    else if( c2 >= 'a' && c2 <= 'f' )
+		c |= c2 - 'a' + 10;
+	    else if( c2 >= 'A' && c2 <= 'F' )
+		c |= c2 - 'A' + 10;
+	    else {
+		mpi_clear(val);
+		return 1;
+	    }
+	    a <<= 8;
+	    a |= c;
+	}
+	i = 0;
+	val->d[j-1] = a;
+    }
+
+    return 0;
+}
+
+
+/****************
+ * print an MPI to the given stream and return the number of characters
+ * printed.
+ */
+int
+mpi_print( FILE *fp, MPI a, int mode )
+{
+    int i, n=0;
+
+    if( a == MPI_NULL )
+	return fprintf(fp, "[MPI_NULL]");
+    if( !mode ) {
+	unsigned n1, n2;
+	n1 = mpi_get_nbits(a);
+	n2 = mpi_get_nbit_info(a);
+	if( n2 && n2 != n1 )
+	    n += fprintf(fp, "[%u bits (%u)]", n1, n2 );
+	else
+	    n += fprintf(fp, "[%u bits]", n1);
+    }
+    else {
+	if( a->sign )
+	    putc('-', fp);
+       #if BYTES_PER_MPI_LIMB == 2
+	  #define X "4"
+       #elif BYTES_PER_MPI_LIMB == 4
+	  #define X "8"
+       #elif BYTES_PER_MPI_LIMB == 8
+	  #define X "16"
+       #else
+	  #error please define the format here
+       #endif
+	for(i=a->nlimbs; i > 0 ; i-- ) {
+	    n += fprintf(fp, i!=a->nlimbs? "%0" X "lX":"%lX", (ulong)a->d[i-1]);
+       #undef X
+	}
+	if( !a->nlimbs )
+	    putc('0', fp );
+    }
+    return n;
+}
+
+
+void
+g10_log_mpidump( const char *text, MPI a )
+{
+    FILE *fp = log_stream();
+
+    g10_log_print_prefix(text);
+    mpi_print(fp, a, 1 );
+    fputc('\n', fp);
+}
+
+/****************
+ * Special function to get the low 8 bytes from an mpi.
+ * This can be used as a keyid; KEYID is an 2 element array.
+ * Return the low 4 bytes.
+ */
+u32
+mpi_get_keyid( MPI a, u32 *keyid )
+{
+#if BYTES_PER_MPI_LIMB == 4
+    if( keyid ) {
+	keyid[0] = a->nlimbs >= 2? a->d[1] : 0;
+	keyid[1] = a->nlimbs >= 1? a->d[0] : 0;
+    }
+    return a->nlimbs >= 1? a->d[0] : 0;
+#elif BYTES_PER_MPI_LIMB == 8
+    if( keyid ) {
+	keyid[0] = a->nlimbs? (u32)(a->d[0] >> 32) : 0;
+	keyid[1] = a->nlimbs? (u32)(a->d[0] & 0xffffffff) : 0;
+    }
+    return a->nlimbs? (u32)(a->d[0] & 0xffffffff) : 0;
+#else
+  #error Make this function work with other LIMB sizes
+#endif
+}
+
+
+/****************
+ * Return an m_alloced buffer with the MPI (msb first).
+ * NBYTES receives the length of this buffer. Caller must free the
+ * return string (This function does return a 0 byte buffer with NBYTES
+ * set to zero if the value of A is zero. If sign is not NULL, it will
+ * be set to the sign of the A.
+ */
+static byte *
+do_get_buffer( MPI a, unsigned *nbytes, int *sign, int force_secure )
+{
+    byte *p, *buffer;
+    mpi_limb_t alimb;
+    int i;
+
+    if( sign )
+	*sign = a->sign;
+    *nbytes = a->nlimbs * BYTES_PER_MPI_LIMB;
+    p = buffer = force_secure || mpi_is_secure(a) ? m_alloc_secure( *nbytes)
+						  : m_alloc( *nbytes );
+
+    for(i=a->nlimbs-1; i >= 0; i-- ) {
+	alimb = a->d[i];
+      #if BYTES_PER_MPI_LIMB == 4
+	*p++ = alimb >> 24;
+	*p++ = alimb >> 16;
+	*p++ = alimb >>  8;
+	*p++ = alimb	  ;
+      #elif BYTES_PER_MPI_LIMB == 8
+	*p++ = alimb >> 56;
+	*p++ = alimb >> 48;
+	*p++ = alimb >> 40;
+	*p++ = alimb >> 32;
+	*p++ = alimb >> 24;
+	*p++ = alimb >> 16;
+	*p++ = alimb >>  8;
+	*p++ = alimb	  ;
+      #else
+	#error please implement for this limb size.
+      #endif
+    }
+
+    /* this is sub-optimal but we need to do the shift oepration because
+     * the caller has to free the returned buffer */
+    for(p=buffer; !*p && *nbytes; p++, --*nbytes )
+	;
+    if( p != buffer )
+	memmove(buffer,p, *nbytes);
+    return buffer;
+}
+
+
+byte *
+mpi_get_buffer( MPI a, unsigned *nbytes, int *sign )
+{
+    return do_get_buffer( a, nbytes, sign, 0 );
+}
+
+byte *
+mpi_get_secure_buffer( MPI a, unsigned *nbytes, int *sign )
+{
+    return do_get_buffer( a, nbytes, sign, 1 );
+}
+
+/****************
+ * Use BUFFER to update MPI.
+ */
+void
+mpi_set_buffer( MPI a, const byte *buffer, unsigned nbytes, int sign )
+{
+    const byte *p;
+    mpi_limb_t alimb;
+    int nlimbs;
+    int i;
+
+    nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+    RESIZE_IF_NEEDED(a, nlimbs);
+    a->sign = sign;
+
+    for(i=0, p = buffer+nbytes-1; p >= buffer+BYTES_PER_MPI_LIMB; ) {
+      #if BYTES_PER_MPI_LIMB == 4
+	alimb  = *p--	    ;
+	alimb |= *p-- <<  8 ;
+	alimb |= *p-- << 16 ;
+	alimb |= *p-- << 24 ;
+      #elif BYTES_PER_MPI_LIMB == 8
+	alimb  = (mpi_limb_t)*p--	;
+	alimb |= (mpi_limb_t)*p-- <<  8 ;
+	alimb |= (mpi_limb_t)*p-- << 16 ;
+	alimb |= (mpi_limb_t)*p-- << 24 ;
+	alimb |= (mpi_limb_t)*p-- << 32 ;
+	alimb |= (mpi_limb_t)*p-- << 40 ;
+	alimb |= (mpi_limb_t)*p-- << 48 ;
+	alimb |= (mpi_limb_t)*p-- << 56 ;
+      #else
+	#error please implement for this limb size.
+      #endif
+	a->d[i++] = alimb;
+    }
+    if( p >= buffer ) {
+      #if BYTES_PER_MPI_LIMB == 4
+	alimb  = *p--	    ;
+	if( p >= buffer ) alimb |= *p-- <<  8 ;
+	if( p >= buffer ) alimb |= *p-- << 16 ;
+	if( p >= buffer ) alimb |= *p-- << 24 ;
+      #elif BYTES_PER_MPI_LIMB == 8
+	alimb  = (mpi_limb_t)*p-- ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- <<	8 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 16 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 24 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 32 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 40 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 48 ;
+	if( p >= buffer ) alimb |= (mpi_limb_t)*p-- << 56 ;
+      #else
+	#error please implement for this limb size.
+      #endif
+	a->d[i++] = alimb;
+    }
+    a->nlimbs = i;
+    assert( i == nlimbs );
+}
+
--- a/mpi/mpih-cmp.c
+++ b/mpi/mpih-cmp.c
@ -0,0 +1,62 @@
+/* mpihelp-sub.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int
+mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size )
+{
+    mpi_size_t i;
+    mpi_limb_t op1_word, op2_word;
+
+    for( i = size - 1; i >= 0 ; i--) {
+	op1_word = op1_ptr[i];
+	op2_word = op2_ptr[i];
+	if( op1_word != op2_word )
+	    goto diff;
+    }
+    return 0;
+
+  diff:
+    /* This can *not* be simplified to
+     *	 op2_word - op2_word
+     * since that expression might give signed overflow.  */
+    return (op1_word > op2_word) ? 1 : -1;
+}
+
--- a/mpi/mpih-div.c
+++ b/mpi/mpih-div.c
@ -0,0 +1,537 @@
+/* mpihelp-div.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+  #define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+  #define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+ * here (not udiv_qrnnd).
+ */
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+				      mpi_limb_t divisor_limb)
+{
+    mpi_size_t i;
+    mpi_limb_t n1, n0, r;
+    int dummy;
+
+    /* Botch: Should this be handled at all?  Rely on callers?	*/
+    if( !dividend_size )
+	return 0;
+
+    /* If multiplication is much faster than division, and the
+     * dividend is large, pre-invert the divisor, and use
+     * only multiplications in the inner loop.
+     *
+     * This test should be read:
+     *	 Does it ever help to use udiv_qrnnd_preinv?
+     *	   && Does what we save compensate for the inversion overhead?
+     */
+    if( UDIV_TIME > (2 * UMUL_TIME + 6)
+	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
+	int normalization_steps;
+
+	count_leading_zeros( normalization_steps, divisor_limb );
+	if( normalization_steps ) {
+	    mpi_limb_t divisor_limb_inverted;
+
+	    divisor_limb <<= normalization_steps;
+
+	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     * most significant bit (with weight 2**N) implicit.
+	     *
+	     * Special case for DIVISOR_LIMB == 100...000.
+	     */
+	    if( !(divisor_limb << 1) )
+		divisor_limb_inverted = ~(mpi_limb_t)0;
+	    else
+		udiv_qrnnd(divisor_limb_inverted, dummy,
+			   -divisor_limb, 0, divisor_limb);
+
+	    n1 = dividend_ptr[dividend_size - 1];
+	    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+	    /* Possible optimization:
+	     * if (r == 0
+	     * && divisor_limb > ((n1 << normalization_steps)
+	     *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+	     * ...one division less...
+	     */
+	    for( i = dividend_size - 2; i >= 0; i--) {
+		n0 = dividend_ptr[i];
+		UDIV_QRNND_PREINV(dummy, r, r,
+				   ((n1 << normalization_steps)
+			  | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+			  divisor_limb, divisor_limb_inverted);
+		n1 = n0;
+	    }
+	    UDIV_QRNND_PREINV(dummy, r, r,
+			      n1 << normalization_steps,
+			      divisor_limb, divisor_limb_inverted);
+	    return r >> normalization_steps;
+	}
+	else {
+	    mpi_limb_t divisor_limb_inverted;
+
+	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     * most significant bit (with weight 2**N) implicit.
+	     *
+	     * Special case for DIVISOR_LIMB == 100...000.
+	     */
+	    if( !(divisor_limb << 1) )
+		divisor_limb_inverted = ~(mpi_limb_t)0;
+	    else
+		udiv_qrnnd(divisor_limb_inverted, dummy,
+			    -divisor_limb, 0, divisor_limb);
+
+	    i = dividend_size - 1;
+	    r = dividend_ptr[i];
+
+	    if( r >= divisor_limb )
+		r = 0;
+	    else
+		i--;
+
+	    for( ; i >= 0; i--) {
+		n0 = dividend_ptr[i];
+		UDIV_QRNND_PREINV(dummy, r, r,
+				  n0, divisor_limb, divisor_limb_inverted);
+	    }
+	    return r;
+	}
+    }
+    else {
+	if( UDIV_NEEDS_NORMALIZATION ) {
+	    int normalization_steps;
+
+	    count_leading_zeros(normalization_steps, divisor_limb);
+	    if( normalization_steps ) {
+		divisor_limb <<= normalization_steps;
+
+		n1 = dividend_ptr[dividend_size - 1];
+		r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+		/* Possible optimization:
+		 * if (r == 0
+		 * && divisor_limb > ((n1 << normalization_steps)
+		 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+		 * ...one division less...
+		 */
+		for(i = dividend_size - 2; i >= 0; i--) {
+		    n0 = dividend_ptr[i];
+		    udiv_qrnnd (dummy, r, r,
+				((n1 << normalization_steps)
+			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+			 divisor_limb);
+		    n1 = n0;
+		}
+		udiv_qrnnd (dummy, r, r,
+			    n1 << normalization_steps,
+			    divisor_limb);
+		return r >> normalization_steps;
+	    }
+	}
+	/* No normalization needed, either because udiv_qrnnd doesn't require
+	 * it, or because DIVISOR_LIMB is already normalized.  */
+	i = dividend_size - 1;
+	r = dividend_ptr[i];
+
+	if(r >= divisor_limb)
+	    r = 0;
+	else
+	    i--;
+
+	for(; i >= 0; i--) {
+	    n0 = dividend_ptr[i];
+	    udiv_qrnnd (dummy, r, r, n0, divisor_limb);
+	}
+	return r;
+    }
+}
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true.	(This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+
+mpi_limb_t
+mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
+		mpi_ptr_t np, mpi_size_t nsize,
+		mpi_ptr_t dp, mpi_size_t dsize)
+{
+    mpi_limb_t most_significant_q_limb = 0;
+
+    switch(dsize) {
+      case 0:
+	/* We are asked to divide by zero, so go ahead and do it!  (To make
+	   the compiler not remove this statement, return the value.)  */
+	return 1 / dsize;
+
+      case 1:
+	{
+	    mpi_size_t i;
+	    mpi_limb_t n1;
+	    mpi_limb_t d;
+
+	    d = dp[0];
+	    n1 = np[nsize - 1];
+
+	    if( n1 >= d ) {
+		n1 -= d;
+		most_significant_q_limb = 1;
+	    }
+
+	    qp += qextra_limbs;
+	    for( i = nsize - 2; i >= 0; i--)
+		udiv_qrnnd( qp[i], n1, n1, np[i], d );
+	    qp -= qextra_limbs;
+
+	    for( i = qextra_limbs - 1; i >= 0; i-- )
+		udiv_qrnnd (qp[i], n1, n1, 0, d);
+
+	    np[0] = n1;
+	}
+	break;
+
+      case 2:
+	{
+	    mpi_size_t i;
+	    mpi_limb_t n1, n0, n2;
+	    mpi_limb_t d1, d0;
+
+	    np += nsize - 2;
+	    d1 = dp[1];
+	    d0 = dp[0];
+	    n1 = np[1];
+	    n0 = np[0];
+
+	    if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) {
+		sub_ddmmss (n1, n0, n1, n0, d1, d0);
+		most_significant_q_limb = 1;
+	    }
+
+	    for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
+		mpi_limb_t q;
+		mpi_limb_t r;
+
+		if( i >= qextra_limbs )
+		    np--;
+		else
+		    np[0] = 0;
+
+		if( n1 == d1 ) {
+		    /* Q should be either 111..111 or 111..110.  Need special
+		     * treatment of this rare case as normal division would
+		     * give overflow.  */
+		    q = ~(mpi_limb_t)0;
+
+		    r = n0 + d1;
+		    if( r < d1 ) {   /* Carry in the addition? */
+			add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
+			qp[i] = q;
+			continue;
+		    }
+		    n1 = d0 - (d0 != 0?1:0);
+		    n0 = -d0;
+		}
+		else {
+		    udiv_qrnnd (q, r, n1, n0, d1);
+		    umul_ppmm (n1, n0, d0, q);
+		}
+
+		n2 = np[0];
+	      q_test:
+		if( n1 > r || (n1 == r && n0 > n2) ) {
+		    /* The estimated Q was too large.  */
+		    q--;
+		    sub_ddmmss (n1, n0, n1, n0, 0, d0);
+		    r += d1;
+		    if( r >= d1 )    /* If not carry, test Q again.  */
+			goto q_test;
+		}
+
+		qp[i] = q;
+		sub_ddmmss (n1, n0, r, n2, n1, n0);
+	    }
+	    np[1] = n1;
+	    np[0] = n0;
+	}
+	break;
+
+      default:
+	{
+	    mpi_size_t i;
+	    mpi_limb_t dX, d1, n0;
+
+	    np += nsize - dsize;
+	    dX = dp[dsize - 1];
+	    d1 = dp[dsize - 2];
+	    n0 = np[dsize - 1];
+
+	    if( n0 >= dX ) {
+		if(n0 > dX || mpihelp_cmp(np, dp, dsize - 1) >= 0 ) {
+		    mpihelp_sub_n(np, np, dp, dsize);
+		    n0 = np[dsize - 1];
+		    most_significant_q_limb = 1;
+		}
+	    }
+
+	    for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+		mpi_limb_t q;
+		mpi_limb_t n1, n2;
+		mpi_limb_t cy_limb;
+
+		if( i >= qextra_limbs ) {
+		    np--;
+		    n2 = np[dsize];
+		}
+		else {
+		    n2 = np[dsize - 1];
+		    MPN_COPY_DECR (np + 1, np, dsize - 1);
+		    np[0] = 0;
+		}
+
+		if( n0 == dX ) {
+		    /* This might over-estimate q, but it's probably not worth
+		     * the extra code here to find out.  */
+		    q = ~(mpi_limb_t)0;
+		}
+		else {
+		    mpi_limb_t r;
+
+		    udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+		    umul_ppmm(n1, n0, d1, q);
+
+		    while( n1 > r || (n1 == r && n0 > np[dsize - 2])) {
+			q--;
+			r += dX;
+			if( r < dX ) /* I.e. "carry in previous addition?" */
+			    break;
+			n1 -= n0 < d1;
+			n0 -= d1;
+		    }
+		}
+
+		/* Possible optimization: We already have (q * n0) and (1 * n1)
+		 * after the calculation of q.	Taking advantage of that, we
+		 * could make this loop make two iterations less.  */
+		cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+
+		if( n2 != cy_limb ) {
+		    mpihelp_add_n(np, np, dp, dsize);
+		    q--;
+		}
+
+		qp[i] = q;
+		n0 = np[dsize - 1];
+	    }
+	}
+    }
+
+    return most_significant_q_limb;
+}
+
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1( mpi_ptr_t quot_ptr,
+		  mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+		  mpi_limb_t divisor_limb)
+{
+    mpi_size_t i;
+    mpi_limb_t n1, n0, r;
+    int dummy;
+
+    if( !dividend_size )
+	return 0;
+
+    /* If multiplication is much faster than division, and the
+     * dividend is large, pre-invert the divisor, and use
+     * only multiplications in the inner loop.
+     *
+     * This test should be read:
+     * Does it ever help to use udiv_qrnnd_preinv?
+     * && Does what we save compensate for the inversion overhead?
+     */
+    if( UDIV_TIME > (2 * UMUL_TIME + 6)
+	&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
+	int normalization_steps;
+
+	count_leading_zeros( normalization_steps, divisor_limb );
+	if( normalization_steps ) {
+	    mpi_limb_t divisor_limb_inverted;
+
+	    divisor_limb <<= normalization_steps;
+
+	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     * most significant bit (with weight 2**N) implicit.
+	     */
+	    /* Special case for DIVISOR_LIMB == 100...000.  */
+	    if( !(divisor_limb << 1) )
+		divisor_limb_inverted = ~(mpi_limb_t)0;
+	    else
+		udiv_qrnnd(divisor_limb_inverted, dummy,
+			   -divisor_limb, 0, divisor_limb);
+
+	    n1 = dividend_ptr[dividend_size - 1];
+	    r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+	    /* Possible optimization:
+	     * if (r == 0
+	     * && divisor_limb > ((n1 << normalization_steps)
+	     *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+	     * ...one division less...
+	     */
+	    for( i = dividend_size - 2; i >= 0; i--) {
+		n0 = dividend_ptr[i];
+		UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
+				   ((n1 << normalization_steps)
+			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+			      divisor_limb, divisor_limb_inverted);
+		n1 = n0;
+	    }
+	    UDIV_QRNND_PREINV( quot_ptr[0], r, r,
+			       n1 << normalization_steps,
+			       divisor_limb, divisor_limb_inverted);
+	    return r >> normalization_steps;
+	}
+	else {
+	    mpi_limb_t divisor_limb_inverted;
+
+	    /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     * most significant bit (with weight 2**N) implicit.
+	     */
+	    /* Special case for DIVISOR_LIMB == 100...000.  */
+	    if( !(divisor_limb << 1) )
+		divisor_limb_inverted = ~(mpi_limb_t) 0;
+	    else
+		udiv_qrnnd(divisor_limb_inverted, dummy,
+			   -divisor_limb, 0, divisor_limb);
+
+	    i = dividend_size - 1;
+	    r = dividend_ptr[i];
+
+	    if( r >= divisor_limb )
+		r = 0;
+	    else
+		quot_ptr[i--] = 0;
+
+	    for( ; i >= 0; i-- ) {
+		n0 = dividend_ptr[i];
+		UDIV_QRNND_PREINV( quot_ptr[i], r, r,
+				   n0, divisor_limb, divisor_limb_inverted);
+	    }
+	    return r;
+	}
+    }
+    else {
+	if(UDIV_NEEDS_NORMALIZATION) {
+	    int normalization_steps;
+
+	    count_leading_zeros (normalization_steps, divisor_limb);
+	    if( normalization_steps ) {
+		divisor_limb <<= normalization_steps;
+
+		n1 = dividend_ptr[dividend_size - 1];
+		r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+		/* Possible optimization:
+		 * if (r == 0
+		 * && divisor_limb > ((n1 << normalization_steps)
+		 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+		 * ...one division less...
+		 */
+		for( i = dividend_size - 2; i >= 0; i--) {
+		    n0 = dividend_ptr[i];
+		    udiv_qrnnd (quot_ptr[i + 1], r, r,
+			     ((n1 << normalization_steps)
+			 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+				divisor_limb);
+		    n1 = n0;
+		}
+		udiv_qrnnd (quot_ptr[0], r, r,
+			    n1 << normalization_steps,
+			    divisor_limb);
+		return r >> normalization_steps;
+	    }
+	}
+	/* No normalization needed, either because udiv_qrnnd doesn't require
+	 * it, or because DIVISOR_LIMB is already normalized.  */
+	i = dividend_size - 1;
+	r = dividend_ptr[i];
+
+	if(r >= divisor_limb)
+	    r = 0;
+	else
+	    quot_ptr[i--] = 0;
+
+	for(; i >= 0; i--) {
+	    n0 = dividend_ptr[i];
+	    udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
+	}
+	return r;
+    }
+}
+
+
--- a/mpi/mpih-mul.c
+++ b/mpi/mpih-mul.c
@ -0,0 +1,528 @@
+/* mpihelp-mul.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+    do {						\
+	if( (size) < KARATSUBA_THRESHOLD )		\
+	    mul_n_basecase (prodp, up, vp, size);	\
+	else						\
+	    mul_n (prodp, up, vp, size, tspace);	\
+    } while (0);
+
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
+    do {					    \
+	if ((size) < KARATSUBA_THRESHOLD)	    \
+	    mpih_sqr_n_basecase (prodp, up, size);	 \
+	else					    \
+	    mpih_sqr_n (prodp, up, size, tspace);	 \
+    } while (0);
+
+
+
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+
+static mpi_limb_t
+mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
+				 mpi_ptr_t vp, mpi_size_t size)
+{
+    mpi_size_t i;
+    mpi_limb_t cy;
+    mpi_limb_t v_limb;
+
+    /* Multiply by the first limb in V separately, as the result can be
+     * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+    v_limb = vp[0];
+    if( v_limb <= 1 ) {
+	if( v_limb == 1 )
+	    MPN_COPY( prodp, up, size );
+	else
+	    MPN_ZERO( prodp, size );
+	cy = 0;
+    }
+    else
+	cy = mpihelp_mul_1( prodp, up, size, v_limb );
+
+    prodp[size] = cy;
+    prodp++;
+
+    /* For each iteration in the outer loop, multiply one limb from
+     * U with one limb from V, and add it to PROD.  */
+    for( i = 1; i < size; i++ ) {
+	v_limb = vp[i];
+	if( v_limb <= 1 ) {
+	    cy = 0;
+	    if( v_limb == 1 )
+	       cy = mpihelp_add_n(prodp, prodp, up, size);
+	}
+	else
+	    cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy;
+	prodp++;
+    }
+
+    return cy;
+}
+
+
+static void
+mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+			mpi_size_t size, mpi_ptr_t tspace )
+{
+    if( size & 1 ) {
+      /* The size is odd, and the code below doesn't handle that.
+       * Multiply the least significant (size - 1) limbs with a recursive
+       * call, and handle the most significant limb of S1 and S2
+       * separately.
+       * A slightly faster way to do this would be to make the Karatsuba
+       * code below behave as if the size were even, and let it check for
+       * odd size in the end.  I.e., in essence move this code to the end.
+       * Doing so would save us a recursive call, and potentially make the
+       * stack grow a lot less.
+       */
+      mpi_size_t esize = size - 1;	 /* even size */
+      mpi_limb_t cy_limb;
+
+      MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
+      cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
+      prodp[esize + esize] = cy_limb;
+      cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
+      prodp[esize + size] = cy_limb;
+    }
+    else {
+	/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+	 *
+	 * Split U in two pieces, U1 and U0, such that
+	 * U = U0 + U1*(B**n),
+	 * and V in V1 and V0, such that
+	 * V = V0 + V1*(B**n).
+	 *
+	 * UV is then computed recursively using the identity
+	 *
+	 *	  2n   n	  n			n
+	 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+	 *		  1 1	     1	0   0  1	      0 0
+	 *
+	 * Where B = 2**BITS_PER_MP_LIMB.
+	 */
+	mpi_size_t hsize = size >> 1;
+	mpi_limb_t cy;
+	int negflg;
+
+	/* Product H.	   ________________  ________________
+	 *		  |_____U1 x V1____||____U0 x V0_____|
+	 * Put result in upper part of PROD and pass low part of TSPACE
+	 * as new TSPACE.
+	 */
+	MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
+
+	/* Product M.	   ________________
+	 *		  |_(U1-U0)(V0-V1)_|
+	 */
+	if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
+	    mpihelp_sub_n(prodp, up + hsize, up, hsize);
+	    negflg = 0;
+	}
+	else {
+	    mpihelp_sub_n(prodp, up, up + hsize, hsize);
+	    negflg = 1;
+	}
+	if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
+	    mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+	    negflg ^= 1;
+	}
+	else {
+	    mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+	    /* No change of NEGFLG.  */
+	}
+	/* Read temporary operands from low part of PROD.
+	 * Put result in low part of TSPACE using upper part of TSPACE
+	 * as new TSPACE.
+	 */
+	MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
+
+	/* Add/copy product H. */
+	MPN_COPY (prodp + hsize, prodp + size, hsize);
+	cy = mpihelp_add_n( prodp + size, prodp + size,
+			    prodp + size + hsize, hsize);
+
+	/* Add product M (if NEGFLG M is a negative number) */
+	if(negflg)
+	    cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+	else
+	    cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+
+	/* Product L.	   ________________  ________________
+	 *		  |________________||____U0 x V0_____|
+	 * Read temporary operands from low part of PROD.
+	 * Put result in low part of TSPACE using upper part of TSPACE
+	 * as new TSPACE.
+	 */
+	MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+
+	/* Add/copy Product L (twice) */
+
+	cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+	if( cy )
+	  mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+	MPN_COPY(prodp, tspace, hsize);
+	cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+	if( cy )
+	    mpihelp_add_1(prodp + size, prodp + size, size, 1);
+    }
+}
+
+
+void
+mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
+{
+    mpi_size_t i;
+    mpi_limb_t cy_limb;
+    mpi_limb_t v_limb;
+
+    /* Multiply by the first limb in V separately, as the result can be
+     * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+    v_limb = up[0];
+    if( v_limb <= 1 ) {
+	if( v_limb == 1 )
+	    MPN_COPY( prodp, up, size );
+	else
+	    MPN_ZERO(prodp, size);
+	cy_limb = 0;
+    }
+    else
+	cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
+
+    prodp[size] = cy_limb;
+    prodp++;
+
+    /* For each iteration in the outer loop, multiply one limb from
+     * U with one limb from V, and add it to PROD.  */
+    for( i=1; i < size; i++) {
+	v_limb = up[i];
+	if( v_limb <= 1 ) {
+	    cy_limb = 0;
+	    if( v_limb == 1 )
+		cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+	}
+	else
+	    cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy_limb;
+	prodp++;
+    }
+}
+
+
+void
+mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+    if( size & 1 ) {
+	/* The size is odd, and the code below doesn't handle that.
+	 * Multiply the least significant (size - 1) limbs with a recursive
+	 * call, and handle the most significant limb of S1 and S2
+	 * separately.
+	 * A slightly faster way to do this would be to make the Karatsuba
+	 * code below behave as if the size were even, and let it check for
+	 * odd size in the end.  I.e., in essence move this code to the end.
+	 * Doing so would save us a recursive call, and potentially make the
+	 * stack grow a lot less.
+	 */
+	mpi_size_t esize = size - 1;	   /* even size */
+	mpi_limb_t cy_limb;
+
+	MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
+	cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
+	prodp[esize + esize] = cy_limb;
+	cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
+
+	prodp[esize + size] = cy_limb;
+    }
+    else {
+	mpi_size_t hsize = size >> 1;
+	mpi_limb_t cy;
+
+	/* Product H.	   ________________  ________________
+	 *		  |_____U1 x U1____||____U0 x U0_____|
+	 * Put result in upper part of PROD and pass low part of TSPACE
+	 * as new TSPACE.
+	 */
+	MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+
+	/* Product M.	   ________________
+	 *		  |_(U1-U0)(U0-U1)_|
+	 */
+	if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
+	    mpihelp_sub_n( prodp, up + hsize, up, hsize);
+	else
+	    mpihelp_sub_n (prodp, up, up + hsize, hsize);
+
+	/* Read temporary operands from low part of PROD.
+	 * Put result in low part of TSPACE using upper part of TSPACE
+	 * as new TSPACE.  */
+	MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+
+	/* Add/copy product H  */
+	MPN_COPY(prodp + hsize, prodp + size, hsize);
+	cy = mpihelp_add_n(prodp + size, prodp + size,
+			   prodp + size + hsize, hsize);
+
+	/* Add product M (if NEGFLG M is a negative number).  */
+	cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+
+	/* Product L.	   ________________  ________________
+	 *		  |________________||____U0 x U0_____|
+	 * Read temporary operands from low part of PROD.
+	 * Put result in low part of TSPACE using upper part of TSPACE
+	 * as new TSPACE.  */
+	MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
+
+	/* Add/copy Product L (twice).	*/
+	cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
+	if( cy )
+	    mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
+							    hsize, cy);
+
+	MPN_COPY(prodp, tspace, hsize);
+	cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+	if( cy )
+	    mpihelp_add_1 (prodp + size, prodp + size, size, 1);
+    }
+}
+
+
+/* This should be made into an inline function in gmp.h.  */
+void
+mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+    int secure;
+
+    if( up == vp ) {
+	if( size < KARATSUBA_THRESHOLD )
+	    mpih_sqr_n_basecase( prodp, up, size );
+	else {
+	    mpi_ptr_t tspace;
+	    secure = m_is_secure( up );
+	    tspace = mpi_alloc_limb_space( 2 * size, secure );
+	    mpih_sqr_n( prodp, up, size, tspace );
+	    mpi_free_limb_space( tspace );
+	}
+    }
+    else {
+	if( size < KARATSUBA_THRESHOLD )
+	    mul_n_basecase( prodp, up, vp, size );
+	else {
+	    mpi_ptr_t tspace;
+	    secure = m_is_secure( up ) || m_is_secure( vp );
+	    tspace = mpi_alloc_limb_space( 2 * size, secure );
+	    mul_n (prodp, up, vp, size, tspace);
+	    mpi_free_limb_space( tspace );
+	}
+    }
+}
+
+
+
+void
+mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
+			    mpi_ptr_t up, mpi_size_t usize,
+			    mpi_ptr_t vp, mpi_size_t vsize,
+			    struct karatsuba_ctx *ctx )
+{
+    mpi_limb_t cy;
+
+    if( !ctx->tspace || ctx->tspace_size < vsize ) {
+	if( ctx->tspace )
+	    mpi_free_limb_space( ctx->tspace );
+	ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
+				       m_is_secure( up ) || m_is_secure( vp ) );
+	ctx->tspace_size = vsize;
+    }
+
+    MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
+
+    prodp += vsize;
+    up += vsize;
+    usize -= vsize;
+    if( usize >= vsize ) {
+	if( !ctx->tp || ctx->tp_size < vsize ) {
+	    if( ctx->tp )
+		mpi_free_limb_space( ctx->tp );
+	    ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
+						      || m_is_secure( vp ) );
+	    ctx->tp_size = vsize;
+	}
+
+	do {
+	    MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
+	    cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
+	    mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
+	    prodp += vsize;
+	    up += vsize;
+	    usize -= vsize;
+	} while( usize >= vsize );
+    }
+
+    if( usize ) {
+	if( usize < KARATSUBA_THRESHOLD ) {
+	    mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
+	}
+	else {
+	    if( !ctx->next ) {
+		ctx->next = m_alloc_clear( sizeof *ctx );
+	    }
+	    mpihelp_mul_karatsuba_case( ctx->tspace,
+					vp, vsize,
+					up, usize,
+					ctx->next );
+	}
+
+	cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
+	mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
+    }
+}
+
+
+void
+mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
+{
+    struct karatsuba_ctx *ctx2;
+
+    if( ctx->tp )
+	mpi_free_limb_space( ctx->tp );
+    if( ctx->tspace )
+	mpi_free_limb_space( ctx->tspace );
+    for( ctx=ctx->next; ctx; ctx = ctx2 ) {
+	ctx2 = ctx->next;
+	if( ctx->tp )
+	    mpi_free_limb_space( ctx->tp );
+	if( ctx->tspace )
+	    mpi_free_limb_space( ctx->tspace );
+	m_free( ctx );
+    }
+}
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+
+mpi_limb_t
+mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+			      mpi_ptr_t vp, mpi_size_t vsize)
+{
+    mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+    mpi_limb_t cy;
+    struct karatsuba_ctx ctx;
+
+    if( vsize < KARATSUBA_THRESHOLD ) {
+	mpi_size_t i;
+	mpi_limb_t v_limb;
+
+	if( !vsize )
+	    return 0;
+
+	/* Multiply by the first limb in V separately, as the result can be
+	 * stored (not added) to PROD.	We also avoid a loop for zeroing.  */
+	v_limb = vp[0];
+	if( v_limb <= 1 ) {
+	    if( v_limb == 1 )
+		MPN_COPY( prodp, up, usize );
+	    else
+		MPN_ZERO( prodp, usize );
+	    cy = 0;
+	}
+	else
+	    cy = mpihelp_mul_1( prodp, up, usize, v_limb );
+
+	prodp[usize] = cy;
+	prodp++;
+
+	/* For each iteration in the outer loop, multiply one limb from
+	 * U with one limb from V, and add it to PROD.	*/
+	for( i = 1; i < vsize; i++ ) {
+	    v_limb = vp[i];
+	    if( v_limb <= 1 ) {
+		cy = 0;
+		if( v_limb == 1 )
+		   cy = mpihelp_add_n(prodp, prodp, up, usize);
+	    }
+	    else
+		cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+
+	    prodp[usize] = cy;
+	    prodp++;
+	}
+
+	return cy;
+    }
+
+    memset( &ctx, 0, sizeof ctx );
+    mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
+    mpihelp_release_karatsuba_ctx( &ctx );
+    return *prod_endp;
+}
+
+
--- a/mpi/mpiutil.c
+++ b/mpi/mpiutil.c
@ -0,0 +1,471 @@
+/* mpiutil.ac  -  Utility functions for MPI
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "mpi.h"
+#include "mpi-internal.h"
+#include "memory.h"
+#include "util.h"
+
+
+#ifdef M_DEBUG
+  #undef mpi_alloc
+  #undef mpi_alloc_secure
+  #undef mpi_free
+#endif
+
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *	  because on a alpha the limbs are large but we normally need
+ *	  integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *	  But mpi_alloc is used in a lot of places :-)
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_alloc( unsigned nlimbs, const char *info )
+#else
+mpi_alloc( unsigned nlimbs )
+#endif
+{
+    MPI a;
+
+    if( DBG_MEMORY )
+	log_debug("mpi_alloc(%u)\n", nlimbs*BITS_PER_MPI_LIMB );
+  #ifdef M_DEBUG
+    a = m_debug_alloc( sizeof *a, info );
+    a->d = nlimbs? mpi_debug_alloc_limb_space( nlimbs, 0, info ) : NULL;
+  #else
+    a = m_alloc( sizeof *a );
+    a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 0 ) : NULL;
+  #endif
+    a->alloced = nlimbs;
+    a->nlimbs = 0;
+    a->sign = 0;
+    a->flags = 0;
+    a->nbits = 0;
+    return a;
+}
+
+void
+mpi_m_check( MPI a )
+{
+    m_check(a);
+    m_check(a->d);
+}
+
+MPI
+#ifdef M_DEBUG
+mpi_debug_alloc_secure( unsigned nlimbs, const char *info )
+#else
+mpi_alloc_secure( unsigned nlimbs )
+#endif
+{
+    MPI a;
+
+    if( DBG_MEMORY )
+	log_debug("mpi_alloc_secure(%u)\n", nlimbs*BITS_PER_MPI_LIMB );
+  #ifdef M_DEBUG
+    a = m_debug_alloc( sizeof *a, info );
+    a->d = nlimbs? mpi_debug_alloc_limb_space( nlimbs, 1, info ) : NULL;
+  #else
+    a = m_alloc( sizeof *a );
+    a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 1 ) : NULL;
+  #endif
+    a->alloced = nlimbs;
+    a->flags = 1;
+    a->nlimbs = 0;
+    a->sign = 0;
+    a->nbits = 0;
+    return a;
+}
+
+
+#if 0
+static void *unused_limbs_5;
+static void *unused_limbs_32;
+static void *unused_limbs_64;
+#endif
+
+mpi_ptr_t
+#ifdef M_DEBUG
+mpi_debug_alloc_limb_space( unsigned nlimbs, int secure, const char *info )
+#else
+mpi_alloc_limb_space( unsigned nlimbs, int secure )
+#endif
+{
+    size_t len = nlimbs * sizeof(mpi_limb_t);
+    mpi_ptr_t p;
+
+    if( DBG_MEMORY )
+	log_debug("mpi_alloc_limb_space(%u)\n", (unsigned)len*8 );
+  #if 0
+    if( !secure ) {
+	if( nlimbs == 5 && unused_limbs_5 ) {  /* DSA 160 bits */
+	    p = unused_limbs_5;
+	    unused_limbs_5 = *p;
+	    return p;
+	}
+	else if( nlimbs == 32 && unused_limbs_32 ) {  /* DSA 1024 bits */
+	    p = unused_limbs_32;
+	    unused_limbs_32 = *p;
+	    return p;
+	}
+	else if( nlimbs == 64 && unused_limbs_64 ) {  /* DSA 2*1024 bits */
+	    p = unused_limbs_64;
+	    unused_limbs_64 = *p;
+	    return p;
+	}
+    }
+  #endif
+
+  #ifdef M_DEBUG
+    p = secure? m_debug_alloc_secure(len, info):m_debug_alloc( len, info );
+  #else
+    p = secure? m_alloc_secure( len ):m_alloc( len );
+  #endif
+
+    return p;
+}
+
+void
+#ifdef M_DEBUG
+mpi_debug_free_limb_space( mpi_ptr_t a, const char *info )
+#else
+mpi_free_limb_space( mpi_ptr_t a )
+#endif
+{
+    if( !a )
+	return;
+    if( DBG_MEMORY )
+	log_debug("mpi_free_limb_space of size %lu\n", (ulong)m_size(a)*8 );
+
+  #if 0
+    if( !m_is_secure(a) ) {
+	size_t nlimbs = m_size(a) / 4 ;
+	void *p = a;
+
+	if( nlimbs == 5 ) {  /* DSA 160 bits */
+	    *a = unused_limbs_5;
+	    unused_limbs_5 = a;
+	    return;
+	}
+	else if( nlimbs == 32 ) {  /* DSA 1024 bits */
+	    *a = unused_limbs_32;
+	    unused_limbs_32 = a;
+	    return;
+	}
+	else if( nlimbs == 64 ) {  /* DSA 2*1024 bits */
+	    *a = unused_limbs_64;
+	    unused_limbs_64 = a;
+	    return;
+	}
+    }
+  #endif
+
+
+    m_free(a);
+}
+
+
+void
+mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs )
+{
+    mpi_free_limb_space(a->d);
+    a->d = ap;
+    a->alloced = nlimbs;
+}
+
+
+
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+void
+#ifdef M_DEBUG
+mpi_debug_resize( MPI a, unsigned nlimbs, const char *info )
+#else
+mpi_resize( MPI a, unsigned nlimbs )
+#endif
+{
+    if( nlimbs <= a->alloced )
+	return; /* no need to do it */
+    /* Note: a->secure is not used - instead the realloc functions
+     * take care of it. Maybe we should drop a->secure completely
+     * and rely on a mpi_is_secure function, which would be
+     * a wrapper around m_is_secure
+     */
+  #ifdef M_DEBUG
+    if( a->d )
+	a->d = m_debug_realloc(a->d, nlimbs * sizeof(mpi_limb_t), info );
+    else
+	a->d = m_debug_alloc_clear( nlimbs * sizeof(mpi_limb_t), info );
+  #else
+    if( a->d )
+	a->d = m_realloc(a->d, nlimbs * sizeof(mpi_limb_t) );
+    else
+	a->d = m_alloc_clear( nlimbs * sizeof(mpi_limb_t) );
+  #endif
+    a->alloced = nlimbs;
+}
+
+void
+mpi_clear( MPI a )
+{
+    a->nlimbs = 0;
+    a->nbits = 0;
+    a->flags = 0;
+}
+
+
+void
+#ifdef M_DEBUG
+mpi_debug_free( MPI a, const char *info )
+#else
+mpi_free( MPI a )
+#endif
+{
+    if( !a )
+	return;
+    if( DBG_MEMORY )
+	log_debug("mpi_free\n" );
+    if( a->flags & 4 )
+	m_free( a->d );
+    else {
+      #ifdef M_DEBUG
+	mpi_debug_free_limb_space(a->d, info);
+      #else
+	mpi_free_limb_space(a->d);
+      #endif
+    }
+    if( a->flags & ~7 )
+	log_bug("invalid flag value in mpi\n");
+    m_free(a);
+}
+
+
+void
+mpi_set_secure( MPI a )
+{
+    mpi_ptr_t ap, bp;
+
+    if( (a->flags & 1) )
+	return;
+    a->flags |= 1;
+    ap = a->d;
+    if( !a->nlimbs ) {
+	assert(!ap);
+	return;
+    }
+  #ifdef M_DEBUG
+    bp = mpi_debug_alloc_limb_space( a->nlimbs, 1, "set_secure" );
+  #else
+    bp = mpi_alloc_limb_space( a->nlimbs, 1 );
+  #endif
+    MPN_COPY( bp, ap, a->nlimbs );
+    a->d = bp;
+  #ifdef M_DEBUG
+    mpi_debug_free_limb_space(ap, "set_secure");
+  #else
+    mpi_free_limb_space(ap);
+  #endif
+}
+
+
+MPI
+mpi_set_opaque( MPI a, void *p, int len )
+{
+    if( !a ) {
+      #ifdef M_DEBUG
+	a = mpi_debug_alloc(0,"alloc_opaque");
+      #else
+	a = mpi_alloc(0);
+      #endif
+    }
+
+    if( a->flags & 4 )
+	m_free( a->d );
+    else {
+      #ifdef M_DEBUG
+	mpi_debug_free_limb_space(a->d, "alloc_opaque");
+      #else
+	mpi_free_limb_space(a->d);
+      #endif
+    }
+
+    a->d = p;
+    a->alloced = 0;
+    a->nlimbs = 0;
+    a->nbits = len;
+    a->flags = 4;
+    return a;
+}
+
+
+void *
+mpi_get_opaque( MPI a, int *len )
+{
+    if( !(a->flags & 4) )
+	log_bug("mpi_get_opaque on normal mpi\n");
+    if( len )
+	*len = a->nbits;
+    return a->d;
+}
+
+
+/****************
+ * Note: This copy function should not interpret the MPI
+ *	 but copy it transparently.
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_copy( MPI a, const char *info )
+#else
+mpi_copy( MPI a )
+#endif
+{
+    int i;
+    MPI b;
+
+    if( a && (a->flags & 4) ) {
+	void *p = m_is_secure(a->d)? m_alloc_secure( a->nbits )
+				   : m_alloc( a->nbits );
+	memcpy( p, a->d, a->nbits );
+	b = mpi_set_opaque( NULL, p, a->nbits );
+    }
+    else if( a ) {
+      #ifdef M_DEBUG
+	b = mpi_is_secure(a)? mpi_debug_alloc_secure( a->nlimbs, info )
+			    : mpi_debug_alloc( a->nlimbs, info );
+      #else
+	b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
+			    : mpi_alloc( a->nlimbs );
+      #endif
+	b->nlimbs = a->nlimbs;
+	b->sign = a->sign;
+	b->flags  = a->flags;
+	b->nbits = a->nbits;
+	for(i=0; i < b->nlimbs; i++ )
+	    b->d[i] = a->d[i];
+    }
+    else
+	b = NULL;
+    return b;
+}
+
+
+/****************
+ * This function allocates an MPI which is optimized to hold
+ * a value as large as the one given in the arhgument and allocates it
+ * with the same flags as A.
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_alloc_like( MPI a, const char *info )
+#else
+mpi_alloc_like( MPI a )
+#endif
+{
+    MPI b;
+
+    if( a && (a->flags & 4) ) {
+	void *p = m_is_secure(a->d)? m_alloc_secure( a->nbits )
+				   : m_alloc( a->nbits );
+	memcpy( p, a->d, a->nbits );
+	b = mpi_set_opaque( NULL, p, a->nbits );
+    }
+    else if( a ) {
+      #ifdef M_DEBUG
+	b = mpi_is_secure(a)? mpi_debug_alloc_secure( a->nlimbs, info )
+			    : mpi_debug_alloc( a->nlimbs, info );
+      #else
+	b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
+			    : mpi_alloc( a->nlimbs );
+      #endif
+	b->nlimbs = 0;
+	b->sign = 0;
+	b->flags = a->flags;
+	b->nbits = 0;
+    }
+    else
+	b = NULL;
+    return b;
+}
+
+
+void
+mpi_set( MPI w, MPI u)
+{
+    mpi_ptr_t wp, up;
+    mpi_size_t usize = u->nlimbs;
+    int usign = u->sign;
+
+    RESIZE_IF_NEEDED(w, usize);
+    wp = w->d;
+    up = u->d;
+    MPN_COPY( wp, up, usize );
+    w->nlimbs = usize;
+    w->nbits = u->nbits;
+    w->flags = u->flags;
+    w->sign = usign;
+}
+
+
+void
+mpi_set_ui( MPI w, unsigned long u)
+{
+    RESIZE_IF_NEEDED(w, 1);
+    w->d[0] = u;
+    w->nlimbs = u? 1:0;
+    w->sign = 0;
+    w->nbits = 0;
+    w->flags = 0;
+}
+
+
+MPI
+mpi_alloc_set_ui( unsigned long u)
+{
+  #ifdef M_DEBUG
+    MPI w = mpi_debug_alloc(1,"alloc_set_ui");
+  #else
+    MPI w = mpi_alloc(1);
+  #endif
+    w->d[0] = u;
+    w->nlimbs = u? 1:0;
+    w->sign = 0;
+    return w;
+}
+
+
+void
+mpi_swap( MPI a, MPI b)
+{
+    struct gcry_mpi tmp;
+
+    tmp = *a; *a = *b; *b = tmp;
+}
+
--- a/mpi/pa7100/distfiles
+++ b/mpi/pa7100/distfiles
@ -0,0 +1,3 @@
+mpih-lshift.S
+mpih-rshift.S
+
--- a/mpi/pa7100/mpih-lshift.S
+++ b/mpi/pa7100/mpih-lshift.S
@ -0,0 +1,96 @@
+/* hppa   lshift
+ *	  optimized for the PA7100, where it runs at 3.25 cycles/limb
+ * 
+ *      Copyright (C) 1992, 1994, 1998, 
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(gr26)
+ *		   mpi_ptr_t up,	(gr25)
+ *		   mpi_size_t usize,	(gr24)
+ *		   unsigned cnt)	(gr23)
+ */
+
+	.code
+	.export 	mpihelp_lshift
+	.label		mpihelp_lshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb 	-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,= 	-1,%r24,L$0004
+	vshd		%r0,%r22,%r28		; compute carry out limb
+	ldws,mb 	-4(0,%r25),%r29
+	addib,<=	-5,%r24,L$rest
+	vshd		%r22,%r29,%r20
+
+	.label	L$loop
+	ldws,mb 	-4(0,%r25),%r22
+	stws,mb 	%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb 	-4(0,%r25),%r29
+	stws,mb 	%r20,-4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,mb 	-4(0,%r25),%r22
+	stws,mb 	%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb 	-4(0,%r25),%r29
+	stws,mb 	%r20,-4(0,%r26)
+	addib,> 	-4,%r24,L$loop
+	vshd		%r22,%r29,%r20
+
+	.label	L$rest
+	addib,= 	4,%r24,L$end1
+	nop
+	.label	L$eloop
+	ldws,mb 	-4(0,%r25),%r22
+	stws,mb 	%r20,-4(0,%r26)
+	addib,<=	-1,%r24,L$end2
+	vshd		%r29,%r22,%r20
+	ldws,mb 	-4(0,%r25),%r29
+	stws,mb 	%r20,-4(0,%r26)
+	addib,> 	-1,%r24,L$eloop
+	vshd		%r22,%r29,%r20
+
+	.label	L$end1
+	stws,mb 	%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+	.label	L$end2
+	stws,mb 	%r20,-4(0,%r26)
+	.label	L$0004
+	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.exit
+	.procend
+
+
+
--- a/mpi/pa7100/mpih-rshift.S
+++ b/mpi/pa7100/mpih-rshift.S
@ -0,0 +1,92 @@
+/* hppa   rshift
+ *	  optimized for the PA7100, where it runs at 3.25 cycles/limb
+ *
+ *      Copyright (C) 1992, 1994, 1998,
+ *                    2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
+ *		   mpi_ptr_t up,       (gr25)
+ *		   mpi_size_t usize,   (gr24)
+ *		   unsigned cnt)       (gr23)
+ */
+
+	.code
+	.export 	mpihelp_rshift
+	.label		mpihelp_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r22
+	mtsar		%r23
+	addib,= 	-1,%r24,L$r004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma 	4(0,%r25),%r29
+	addib,<=	-5,%r24,L$rrest
+	vshd		%r29,%r22,%r20
+
+	.label	L$roop
+	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,> 	-4,%r24,L$roop
+	vshd		%r29,%r22,%r20
+
+	.label	L$rrest
+	addib,= 	4,%r24,L$rend1
+	nop
+	.label	L$eroop
+	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	addib,<=	-1,%r24,L$rend2
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,> 	-1,%r24,L$eroop
+	vshd		%r29,%r22,%r20
+
+	.label	L$rend1
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+	.label	L$rend2
+	stws,ma		%r20,4(0,%r26)
+	.label	L$r004
+	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
+
+
--- a/mpi/power/distfiles
+++ b/mpi/power/distfiles
@ -0,0 +1,7 @@
+mpih-add1.S
+mpih-lshift.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-rshift.S
+mpih-sub1.S
--- a/mpi/power/mpih-add1.S
+++ b/mpi/power/mpih-add1.S
@ -0,0 +1,86 @@
+/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length.
+ *
+ *      Copyright (C) 1992, 1994, 1996, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+ */
+
+	.toc
+	.extern mpihelp_add_n[DS]
+	.extern .mpihelp_add_n
+.csect [PR]
+	.align 2
+	.globl mpihelp_add_n
+	.globl .mpihelp_add_n
+	.csect mpihelp_add_n[DS]
+mpihelp_add_n:
+	.long .mpihelp_add_n, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_add_n:
+	andil.	10,6,1		# odd or even number of limbs?
+	l	8,0(4)		# load least significant s1 limb
+	l	0,0(5)		# load least significant s2 limb
+	cal	3,-4(3) 	# offset res_ptr, it's updated before it's used
+	sri	10,6,1		# count for unrolled loop
+	a	7,0,8		# add least significant limbs, set cy
+	mtctr	10		# copy count into CTR
+	beq	0,Leven 	# branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		# is count for unrolled loop zero?
+	bne	1,L1		# branch if not
+	st	7,4(3)
+	aze	3,10		# use the fact that r10 is zero...
+	br			# return
+
+# We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	stu	7,4(3)
+	ae	7,0,8		# add limbs, set cy
+Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	bdz	Lend		# If done, skip loop
+
+Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	ae	11,9,10 	# add previous limbs with cy, set cy
+	stu	7,4(3)		#
+	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	ae	7,0,8		# add previous limbs with cy, set cy
+	stu	11,4(3) 	#
+	bdn	Loop		# decrement CTR and loop back
+
+Lend:	ae	11,9,10 	# add limbs with cy, set cy
+	st	7,4(3)		#
+	st	11,8(3) 	#
+	lil	3,0		# load cy into ...
+	aze	3,3		# ... return value register
+	br
+
--- a/mpi/power/mpih-lshift.S
+++ b/mpi/power/mpih-lshift.S
@ -0,0 +1,64 @@
+/* IBM POWER lshift
+ *
+ *      Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s_ptr 	r4
+# size		r5
+# cnt		r6
+ */
+
+	.toc
+	.extern mpihelp_lshift[DS]
+	.extern .mpihelp_lshift
+.csect [PR]
+	.align 2
+	.globl mpihelp_lshift
+	.globl .mpihelp_lshift
+	.csect mpihelp_lshift[DS]
+mpihelp_lshift:
+	.long .mpihelp_lshift, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_lshift:
+	sli	0,5,2
+	cax	9,3,0
+	cax	4,4,0
+	sfi	8,6,32
+	mtctr	5		# put limb count in CTR loop register
+	lu	0,-4(4) 	# read most significant limb
+	sre	3,0,8		# compute carry out limb, and init MQ register
+	bdz	Lend2		# if just one limb, skip loop
+	lu	0,-4(4) 	# read 2:nd most significant limb
+	sreq	7,0,8		# compute most significant limb of result
+	bdz	Lend		# if just two limb, skip loop
+Loop:	lu	0,-4(4) 	# load next lower limb
+	stu	7,-4(9) 	# store previous result during read latency
+	sreq	7,0,8		# compute result limb
+	bdn	Loop		# loop back until CTR is zero
+Lend:	stu	7,-4(9) 	# store 2:nd least significant limb
+Lend2:	sle	7,0,6		# compute least significant limb
+	st	7,-4(9) 	# store it
+	br
+
--- a/mpi/power/mpih-mul1.S
+++ b/mpi/power/mpih-mul1.S
@ -0,0 +1,115 @@
+/* IBM POWER  mul_1 -- Multiply a limb vector with a limb and store
+ *                     the result in a second limb vector.
+ *
+ *      Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.	We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+ */
+
+	.toc
+	.csect .mpihelp_mul_1[PR]
+	.align 2
+	.globl mpihelp_mul_1
+	.globl .mpihelp_mul_1
+	.csect mpihelp_mul_1[DS]
+mpihelp_mul_1:
+	.long .mpihelp_mul_1[PR], TOC[tc0], 0
+	.csect .mpihelp_mul_1[PR]
+.mpihelp_mul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	ai	0,0,0		# reset carry
+	cax	9,9,7
+	blt	Lneg
+Lpos:	bdz	Lend
+Lploop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9
+	bge	Lp0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	cax	10,10,0 	# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,9
+	bge	Ln0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	cax	9,9,0		# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,10
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+
--- a/Show more
+++ b/Show more