mirror of
git://git.gnupg.org/gnupg.git
synced 2025-07-02 22:46:30 +02:00
a couple of changes; but some parts are now broken
This commit is contained in:
parent
d6fa02add6
commit
4c0c155922
46 changed files with 1879 additions and 433 deletions
|
@ -24,6 +24,7 @@ libmpi_a_SOURCES = longlong.h \
|
|||
mpi-inv.c \
|
||||
mpi-mul.c \
|
||||
mpi-pow.c \
|
||||
mpi-mpow.c \
|
||||
mpi-scan.c \
|
||||
mpicoder.c \
|
||||
mpih-cmp.c \
|
||||
|
|
|
@ -106,6 +106,7 @@ libmpi_a_SOURCES = longlong.h \
|
|||
mpi-inv.c \
|
||||
mpi-mul.c \
|
||||
mpi-pow.c \
|
||||
mpi-mpow.c \
|
||||
mpi-scan.c \
|
||||
mpicoder.c \
|
||||
mpih-cmp.c \
|
||||
|
@ -138,13 +139,13 @@ LIBS = @LIBS@
|
|||
libmpi_a_DEPENDENCIES = mpih-mul1.o mpih-mul2.o mpih-mul3.o mpih-add1.o \
|
||||
mpih-sub1.o mpih-shift.o
|
||||
libmpi_a_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \
|
||||
mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpih-cmp.o \
|
||||
mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
|
||||
mpi-inv.o mpi-mul.o mpi-pow.o mpi-mpow.o mpi-scan.o mpicoder.o \
|
||||
mpih-cmp.o mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
|
||||
AR = ar
|
||||
CFLAGS = @CFLAGS@
|
||||
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
|
||||
LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@
|
||||
DIST_COMMON = Makefile.am Makefile.in
|
||||
DIST_COMMON = ChangeLog Makefile.am Makefile.in
|
||||
|
||||
|
||||
DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
|
||||
|
@ -152,10 +153,10 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
|
|||
TAR = tar
|
||||
GZIP = --best
|
||||
DEP_FILES = .deps/mpi-add.P .deps/mpi-bit.P .deps/mpi-cmp.P \
|
||||
.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mul.P \
|
||||
.deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P .deps/mpih-add.P \
|
||||
.deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P .deps/mpih-sub.P \
|
||||
.deps/mpiutil.P
|
||||
.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mpow.P \
|
||||
.deps/mpi-mul.P .deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P \
|
||||
.deps/mpih-add.P .deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P \
|
||||
.deps/mpih-sub.P .deps/mpiutil.P
|
||||
SOURCES = $(libmpi_a_SOURCES)
|
||||
OBJECTS = $(libmpi_a_OBJECTS)
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ test -d ./mpi || mkdir ./mpi
|
|||
echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
|
||||
|
||||
case "${target}" in
|
||||
i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*)
|
||||
i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*)
|
||||
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
|
@ -20,14 +20,14 @@ case "${target}" in
|
|||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i586 i386"
|
||||
;;
|
||||
i[3456]86*-*-*)
|
||||
i[34]86*-*-*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i386"
|
||||
;;
|
||||
i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
|
||||
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i586/syntax.h >>./mpi/asm-syntax.h
|
||||
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
|
||||
path="i586 i386"
|
||||
;;
|
||||
alpha*-*-*)
|
||||
|
|
26
mpi/i586/README
Normal file
26
mpi/i586/README
Normal file
|
@ -0,0 +1,26 @@
|
|||
This directory contains mpn functions optimized for Intel Pentium
|
||||
processors.
|
||||
|
||||
RELEVANT OPTIMIZATION ISSUES
|
||||
|
||||
1. Pentium doesn't allocate cache lines on writes, unlike most other modern
|
||||
processors. Since the functions in the mpn class do array writes, we have to
|
||||
handle allocating the destination cache lines by reading a word from it in the
|
||||
loops, to achieve the best performance.
|
||||
|
||||
2. Pairing of memory operations requires that the two issued operations refer
|
||||
to different cache banks. The simplest way to insure this is to read/write
|
||||
two words from the same object. If we make operations on different objects,
|
||||
they might or might not be to the same cache bank.
|
||||
|
||||
STATUS
|
||||
|
||||
1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
|
||||
documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
|
||||
or 5 cycles/limb asymptotically.
|
||||
|
||||
2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop
|
||||
overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
|
||||
|
||||
3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
|
||||
should...
|
8
mpi/i586/distfiles
Normal file
8
mpi/i586/distfiles
Normal file
|
@ -0,0 +1,8 @@
|
|||
mpih-add1.S
|
||||
mpih-mul1.S
|
||||
mpih-mul2.S
|
||||
mpih-mul3.S
|
||||
mpih-shift.S
|
||||
mpih-sub1.S
|
||||
README
|
||||
|
134
mpi/i586/mpih-add1.S
Normal file
134
mpi/i586/mpih-add1.S
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* i80586 add_n -- Add two limb vectors of the same length > 0 and store
|
||||
* sum in a third limb vector.
|
||||
*
|
||||
* Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_add_n( mpi_ptr_t res_ptr, (sp + 4)
|
||||
* mpi_ptr_t s1_ptr, (sp + 8)
|
||||
* mpi_ptr_t s2_ptr, (sp + 12)
|
||||
* mpi_size_t size) (sp + 16)
|
||||
*/
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(mpihelp_add_n)
|
||||
C_SYMBOL_NAME(mpihelp_add_n:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s1_ptr */
|
||||
movl 28(%esp),%ebp /* s2_ptr */
|
||||
movl 32(%esp),%ecx /* size */
|
||||
|
||||
movl (%ebp),%ebx
|
||||
|
||||
decl %ecx
|
||||
movl %ecx,%edx
|
||||
shrl $3,%ecx
|
||||
andl $7,%edx
|
||||
testl %ecx,%ecx /* zero carry flag */
|
||||
jz Lend
|
||||
pushl %edx
|
||||
|
||||
ALIGN (3)
|
||||
Loop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
leal 32(%edi),%edi
|
||||
|
||||
L1: movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
adcl %ebx,%eax
|
||||
movl 4(%ebp),%ebx
|
||||
adcl %ebx,%edx
|
||||
movl 8(%ebp),%ebx
|
||||
movl %eax,-32(%edi)
|
||||
movl %edx,-28(%edi)
|
||||
|
||||
L2: movl 8(%esi),%eax
|
||||
movl 12(%esi),%edx
|
||||
adcl %ebx,%eax
|
||||
movl 12(%ebp),%ebx
|
||||
adcl %ebx,%edx
|
||||
movl 16(%ebp),%ebx
|
||||
movl %eax,-24(%edi)
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
L3: movl 16(%esi),%eax
|
||||
movl 20(%esi),%edx
|
||||
adcl %ebx,%eax
|
||||
movl 20(%ebp),%ebx
|
||||
adcl %ebx,%edx
|
||||
movl 24(%ebp),%ebx
|
||||
movl %eax,-16(%edi)
|
||||
movl %edx,-12(%edi)
|
||||
|
||||
L4: movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
adcl %ebx,%eax
|
||||
movl 28(%ebp),%ebx
|
||||
adcl %ebx,%edx
|
||||
movl 32(%ebp),%ebx
|
||||
movl %eax,-8(%edi)
|
||||
movl %edx,-4(%edi)
|
||||
|
||||
leal 32(%esi),%esi
|
||||
leal 32(%ebp),%ebp
|
||||
decl %ecx
|
||||
jnz Loop
|
||||
|
||||
popl %edx
|
||||
Lend:
|
||||
decl %edx /* test %edx w/o clobbering carry */
|
||||
js Lend2
|
||||
incl %edx
|
||||
Loop2:
|
||||
leal 4(%edi),%edi
|
||||
movl (%esi),%eax
|
||||
adcl %ebx,%eax
|
||||
movl 4(%ebp),%ebx
|
||||
movl %eax,-4(%edi)
|
||||
leal 4(%esi),%esi
|
||||
leal 4(%ebp),%ebp
|
||||
decl %edx
|
||||
jnz Loop2
|
||||
Lend2:
|
||||
movl (%esi),%eax
|
||||
adcl %ebx,%eax
|
||||
movl %eax,(%edi)
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
|
89
mpi/i586/mpih-mul1.S
Normal file
89
mpi/i586/mpih-mul1.S
Normal file
|
@ -0,0 +1,89 @@
|
|||
/* i80586 mul_1 -- Multiply a limb vector with a limb and store
|
||||
* the result in a second limb vector.
|
||||
* Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*
|
||||
* Note: This code is heavily based on the GNU MP Library.
|
||||
* Actually it's the same code with only minor changes in the
|
||||
* way the data is stored; this is to support the abstraction
|
||||
* of an optional secure memory allocation which may be used
|
||||
* to avoid revealing of sensitive data due to paging etc.
|
||||
* The GNU MP Library itself is published under the LGPL;
|
||||
* however I decided to publish this code under the plain GPL.
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_mul_1( mpi_ptr_t res_ptr, (sp + 4)
|
||||
* mpi_ptr_t s1_ptr, (sp + 8)
|
||||
* mpi_size_t s1_size, (sp + 12)
|
||||
* mpi_limb_t s2_limb) (sp + 16)
|
||||
*/
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define size ecx
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(mpihelp_mul_1)
|
||||
C_SYMBOL_NAME(mpihelp_mul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(size),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
|
||||
INSN1(neg,l ,R(size))
|
||||
INSN2(xor,l ,R(ebx),R(ebx))
|
||||
ALIGN (3)
|
||||
|
||||
Loop: INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(ebx),R(eax))
|
||||
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
|
||||
INSN1(inc,l ,R(size))
|
||||
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),R(ebx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
||||
|
94
mpi/i586/mpih-mul2.S
Normal file
94
mpi/i586/mpih-mul2.S
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* i80586 addmul_1 -- Multiply a limb vector with a limb and add
|
||||
* the result to a second limb vector.
|
||||
* Copyright (c) 1997 by Werner Koch (dd9jn)
|
||||
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*
|
||||
* Note: This code is heavily based on the GNU MP Library.
|
||||
* Actually it's the same code with only minor changes in the
|
||||
* way the data is stored; this is to support the abstraction
|
||||
* of an optional secure memory allocation which may be used
|
||||
* to avoid revealing of sensitive data due to paging etc.
|
||||
* The GNU MP Library itself is published under the LGPL;
|
||||
* however I decided to publish this code under the plain GPL.
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_addmul_1( mpi_ptr_t res_ptr, (sp + 4)
|
||||
* mpi_ptr_t s1_ptr, (sp + 8)
|
||||
* mpi_size_t s1_size, (sp + 12)
|
||||
* mpi_limb_t s2_limb) (sp + 16)
|
||||
*/
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define size ecx
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(mpihelp_addmul_1)
|
||||
C_SYMBOL_NAME(mpihelp_addmul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(size),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
|
||||
INSN1(neg,l ,R(size))
|
||||
INSN2(xor,l ,R(ebx),R(ebx))
|
||||
ALIGN (3)
|
||||
|
||||
Loop: INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(eax),R(ebx))
|
||||
INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(add,l ,R(ebx),R(eax))
|
||||
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
|
||||
INSN1(inc,l ,R(size))
|
||||
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),R(ebx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
||||
|
94
mpi/i586/mpih-mul3.S
Normal file
94
mpi/i586/mpih-mul3.S
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* i80586 submul_1 -- Multiply a limb vector with a limb and add
|
||||
* the result to a second limb vector.
|
||||
* Copyright (c) 1997 by Werner Koch (dd9jn)
|
||||
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*
|
||||
* Note: This code is heavily based on the GNU MP Library.
|
||||
* Actually it's the same code with only minor changes in the
|
||||
* way the data is stored; this is to support the abstraction
|
||||
* of an optional secure memory allocation which may be used
|
||||
* to avoid revealing of sensitive data due to paging etc.
|
||||
* The GNU MP Library itself is published under the LGPL;
|
||||
* however I decided to publish this code under the plain GPL.
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_submul_1( mpi_ptr_t res_ptr, (sp + 4)
|
||||
* mpi_ptr_t s1_ptr, (sp + 8)
|
||||
* mpi_size_t s1_size, (sp + 12)
|
||||
* mpi_limb_t s2_limb) (sp + 16)
|
||||
*/
|
||||
|
||||
#define res_ptr edi
|
||||
#define s1_ptr esi
|
||||
#define size ecx
|
||||
#define s2_limb ebp
|
||||
|
||||
TEXT
|
||||
ALIGN (3)
|
||||
GLOBL C_SYMBOL_NAME(mpihelp_submul_1)
|
||||
C_SYMBOL_NAME(mpihelp_submul_1:)
|
||||
|
||||
INSN1(push,l ,R(edi))
|
||||
INSN1(push,l ,R(esi))
|
||||
INSN1(push,l ,R(ebx))
|
||||
INSN1(push,l ,R(ebp))
|
||||
|
||||
INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
|
||||
INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
|
||||
INSN2(mov,l ,R(size),MEM_DISP(esp,28))
|
||||
INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
|
||||
|
||||
INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
|
||||
INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
|
||||
INSN1(neg,l ,R(size))
|
||||
INSN2(xor,l ,R(ebx),R(ebx))
|
||||
ALIGN (3)
|
||||
|
||||
Loop: INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
|
||||
|
||||
INSN1(mul,l ,R(s2_limb))
|
||||
|
||||
INSN2(add,l ,R(eax),R(ebx))
|
||||
INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
|
||||
|
||||
INSN2(adc,l ,R(edx),$0)
|
||||
INSN2(sub,l ,R(ebx),R(eax))
|
||||
|
||||
INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
|
||||
INSN1(inc,l ,R(size))
|
||||
|
||||
INSN2(mov,l ,R(ebx),R(edx))
|
||||
INSN1(jnz, ,Loop)
|
||||
|
||||
INSN2(adc,l ,R(ebx),$0)
|
||||
INSN2(mov,l ,R(eax),R(ebx))
|
||||
INSN1(pop,l ,R(ebp))
|
||||
INSN1(pop,l ,R(ebx))
|
||||
INSN1(pop,l ,R(esi))
|
||||
INSN1(pop,l ,R(edi))
|
||||
ret
|
||||
|
426
mpi/i586/mpih-shift.S
Normal file
426
mpi/i586/mpih-shift.S
Normal file
|
@ -0,0 +1,426 @@
|
|||
/* i80586 rshift, lshift
|
||||
* Copyright (c) 1997 by Werner Koch (dd9jn)
|
||||
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*
|
||||
* Note: This code is heavily based on the GNU MP Library.
|
||||
* Actually it's the same code with only minor changes in the
|
||||
* way the data is stored; this is to support the abstraction
|
||||
* of an optional secure memory allocation which may be used
|
||||
* to avoid revealing of sensitive data due to paging etc.
|
||||
* The GNU MP Library itself is published under the LGPL;
|
||||
* however I decided to publish this code under the plain GPL.
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
|
||||
* mpi_ptr_t up, (sp + 8)
|
||||
* mpi_size_t usize, (sp + 12)
|
||||
* unsigned cnt) (sp + 16)
|
||||
*/
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(mpihelp_lshift)
|
||||
C_SYMBOL_NAME(mpihelp_lshift:)
|
||||
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s_ptr */
|
||||
movl 28(%esp),%ebp /* size */
|
||||
movl 32(%esp),%ecx /* cnt */
|
||||
|
||||
/* We can use faster code for shift-by-1 under certain conditions. */
|
||||
cmp $1,%ecx
|
||||
jne Lnormal
|
||||
leal 4(%esi),%eax
|
||||
cmpl %edi,%eax
|
||||
jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
|
||||
leal (%esi,%ebp,4),%eax
|
||||
cmpl %eax,%edi
|
||||
jnc Lspecial /* jump if res_ptr >= s_ptr + size */
|
||||
|
||||
Lnormal:
|
||||
leal -4(%edi,%ebp,4),%edi
|
||||
leal -4(%esi,%ebp,4),%esi
|
||||
|
||||
movl (%esi),%edx
|
||||
subl $4,%esi
|
||||
xorl %eax,%eax
|
||||
shldl %cl,%edx,%eax /* compute carry limb */
|
||||
pushl %eax /* push carry limb onto stack */
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
jz Lend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
Loop: movl -28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl -4(%esi),%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
movl %ebx,(%edi)
|
||||
movl %eax,-4(%edi)
|
||||
|
||||
movl -8(%esi),%ebx
|
||||
movl -12(%esi),%eax
|
||||
shldl %cl,%ebx,%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
movl %edx,-8(%edi)
|
||||
movl %ebx,-12(%edi)
|
||||
|
||||
movl -16(%esi),%edx
|
||||
movl -20(%esi),%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
shldl %cl,%ebx,%edx
|
||||
movl %eax,-16(%edi)
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
movl -24(%esi),%eax
|
||||
movl -28(%esi),%edx
|
||||
shldl %cl,%eax,%ebx
|
||||
shldl %cl,%edx,%eax
|
||||
movl %ebx,-24(%edi)
|
||||
movl %eax,-28(%edi)
|
||||
|
||||
subl $32,%esi
|
||||
subl $32,%edi
|
||||
decl %ebp
|
||||
jnz Loop
|
||||
|
||||
Lend: popl %ebp
|
||||
andl $7,%ebp
|
||||
jz Lend2
|
||||
Loop2: movl (%esi),%eax
|
||||
shldl %cl,%eax,%edx
|
||||
movl %edx,(%edi)
|
||||
movl %eax,%edx
|
||||
subl $4,%esi
|
||||
subl $4,%edi
|
||||
decl %ebp
|
||||
jnz Loop2
|
||||
|
||||
Lend2: shll %cl,%edx /* compute least significant limb */
|
||||
movl %edx,(%edi) /* store it */
|
||||
|
||||
popl %eax /* pop carry limb */
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
/* We loop from least significant end of the arrays, which is only
|
||||
permissable if the source and destination don't overlap, since the
|
||||
function is documented to work for overlapping source and destination.
|
||||
*/
|
||||
|
||||
Lspecial:
|
||||
movl (%esi),%edx
|
||||
addl $4,%esi
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
|
||||
addl %edx,%edx
|
||||
incl %ebp
|
||||
decl %ebp
|
||||
jz LLend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
LLoop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
adcl %eax,%eax
|
||||
movl %ebx,(%edi)
|
||||
adcl %edx,%edx
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movl 8(%esi),%ebx
|
||||
movl 12(%esi),%eax
|
||||
adcl %ebx,%ebx
|
||||
movl %edx,8(%edi)
|
||||
adcl %eax,%eax
|
||||
movl %ebx,12(%edi)
|
||||
|
||||
movl 16(%esi),%edx
|
||||
movl 20(%esi),%ebx
|
||||
adcl %edx,%edx
|
||||
movl %eax,16(%edi)
|
||||
adcl %ebx,%ebx
|
||||
movl %edx,20(%edi)
|
||||
|
||||
movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
adcl %eax,%eax
|
||||
movl %ebx,24(%edi)
|
||||
adcl %edx,%edx
|
||||
movl %eax,28(%edi)
|
||||
|
||||
leal 32(%esi),%esi /* use leal not to clobber carry */
|
||||
leal 32(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop
|
||||
|
||||
LLend: popl %ebp
|
||||
sbbl %eax,%eax /* save carry in %eax */
|
||||
andl $7,%ebp
|
||||
jz LLend2
|
||||
addl %eax,%eax /* restore carry from eax */
|
||||
LLoop2: movl %edx,%ebx
|
||||
movl (%esi),%edx
|
||||
adcl %edx,%edx
|
||||
movl %ebx,(%edi)
|
||||
|
||||
leal 4(%esi),%esi /* use leal not to clobber carry */
|
||||
leal 4(%edi),%edi
|
||||
decl %ebp
|
||||
jnz LLoop2
|
||||
|
||||
jmp LL1
|
||||
LLend2: addl %eax,%eax /* restore carry from eax */
|
||||
LL1: movl %edx,(%edi) /* store last limb */
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_rshift( mpi_ptr_t wp, (sp + 4)
|
||||
* mpi_ptr_t up, (sp + 8)
|
||||
* mpi_size_t usize, (sp + 12)
|
||||
* unsigned cnt) (sp + 16)
|
||||
*/
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(mpihelp_rshift)
|
||||
C_SYMBOL_NAME(mpihelp_rshift:)
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s_ptr */
|
||||
movl 28(%esp),%ebp /* size */
|
||||
movl 32(%esp),%ecx /* cnt */
|
||||
|
||||
/* We can use faster code for shift-by-1 under certain conditions. */
|
||||
cmp $1,%ecx
|
||||
jne Rnormal
|
||||
leal 4(%edi),%eax
|
||||
cmpl %esi,%eax
|
||||
jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */
|
||||
leal (%edi,%ebp,4),%eax
|
||||
cmpl %eax,%esi
|
||||
jnc Rspecial /* jump if s_ptr >= res_ptr + size */
|
||||
|
||||
Rnormal:
|
||||
movl (%esi),%edx
|
||||
addl $4,%esi
|
||||
xorl %eax,%eax
|
||||
shrdl %cl,%edx,%eax /* compute carry limb */
|
||||
pushl %eax /* push carry limb onto stack */
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
jz Rend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
Roop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
movl %ebx,(%edi)
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movl 8(%esi),%ebx
|
||||
movl 12(%esi),%eax
|
||||
shrdl %cl,%ebx,%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
movl %edx,8(%edi)
|
||||
movl %ebx,12(%edi)
|
||||
|
||||
movl 16(%esi),%edx
|
||||
movl 20(%esi),%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
shrdl %cl,%ebx,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,20(%edi)
|
||||
|
||||
movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
shrdl %cl,%eax,%ebx
|
||||
shrdl %cl,%edx,%eax
|
||||
movl %ebx,24(%edi)
|
||||
movl %eax,28(%edi)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
decl %ebp
|
||||
jnz Roop
|
||||
|
||||
Rend: popl %ebp
|
||||
andl $7,%ebp
|
||||
jz Rend2
|
||||
Roop2: movl (%esi),%eax
|
||||
shrdl %cl,%eax,%edx /* compute result limb */
|
||||
movl %edx,(%edi)
|
||||
movl %eax,%edx
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
decl %ebp
|
||||
jnz Roop2
|
||||
|
||||
Rend2: shrl %cl,%edx /* compute most significant limb */
|
||||
movl %edx,(%edi) /* store it */
|
||||
|
||||
popl %eax /* pop carry limb */
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
||||
/* We loop from least significant end of the arrays, which is only
|
||||
permissable if the source and destination don't overlap, since the
|
||||
function is documented to work for overlapping source and destination.
|
||||
*/
|
||||
|
||||
Rspecial:
|
||||
leal -4(%edi,%ebp,4),%edi
|
||||
leal -4(%esi,%ebp,4),%esi
|
||||
|
||||
movl (%esi),%edx
|
||||
subl $4,%esi
|
||||
|
||||
decl %ebp
|
||||
pushl %ebp
|
||||
shrl $3,%ebp
|
||||
|
||||
shrl $1,%edx
|
||||
incl %ebp
|
||||
decl %ebp
|
||||
jz RLend
|
||||
|
||||
movl (%edi),%eax /* fetch destination cache line */
|
||||
|
||||
ALIGN (2)
|
||||
RLoop: movl -28(%edi),%eax /* fetch destination cache line */
|
||||
movl %edx,%ebx
|
||||
|
||||
movl (%esi),%eax
|
||||
movl -4(%esi),%edx
|
||||
rcrl $1,%eax
|
||||
movl %ebx,(%edi)
|
||||
rcrl $1,%edx
|
||||
movl %eax,-4(%edi)
|
||||
|
||||
movl -8(%esi),%ebx
|
||||
movl -12(%esi),%eax
|
||||
rcrl $1,%ebx
|
||||
movl %edx,-8(%edi)
|
||||
rcrl $1,%eax
|
||||
movl %ebx,-12(%edi)
|
||||
|
||||
movl -16(%esi),%edx
|
||||
movl -20(%esi),%ebx
|
||||
rcrl $1,%edx
|
||||
movl %eax,-16(%edi)
|
||||
rcrl $1,%ebx
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
movl -24(%esi),%eax
|
||||
movl -28(%esi),%edx
|
||||
rcrl $1,%eax
|
||||
movl %ebx,-24(%edi)
|
||||
rcrl $1,%edx
|
||||
movl %eax,-28(%edi)
|
||||
|
||||
leal -32(%esi),%esi /* use leal not to clobber carry */
|
||||
leal -32(%edi),%edi
|
||||
decl %ebp
|
||||
jnz RLoop
|
||||
|
||||
RLend: popl %ebp
|
||||
sbbl %eax,%eax /* save carry in %eax */
|
||||
andl $7,%ebp
|
||||
jz RLend2
|
||||
addl %eax,%eax /* restore carry from eax */
|
||||
RLoop2: movl %edx,%ebx
|
||||
movl (%esi),%edx
|
||||
rcrl $1,%edx
|
||||
movl %ebx,(%edi)
|
||||
|
||||
leal -4(%esi),%esi /* use leal not to clobber carry */
|
||||
leal -4(%edi),%edi
|
||||
decl %ebp
|
||||
jnz RLoop2
|
||||
|
||||
jmp RL1
|
||||
RLend2: addl %eax,%eax /* restore carry from eax */
|
||||
RL1: movl %edx,(%edi) /* store last limb */
|
||||
|
||||
movl $0,%eax
|
||||
rcrl $1,%eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
143
mpi/i586/mpih-sub1.S
Normal file
143
mpi/i586/mpih-sub1.S
Normal file
|
@ -0,0 +1,143 @@
|
|||
/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store
|
||||
* sum in a third limb vector.
|
||||
* Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
* Copyright (c) 1997 by Werner Koch (dd9jn)
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*
|
||||
* Note: This code is heavily based on the GNU MP Library.
|
||||
* Actually it's the same code with only minor changes in the
|
||||
* way the data is stored; this is to support the abstraction
|
||||
* of an optional secure memory allocation which may be used
|
||||
* to avoid revealing of sensitive data due to paging etc.
|
||||
* The GNU MP Library itself is published under the LGPL;
|
||||
* however I decided to publish this code under the plain GPL.
|
||||
*/
|
||||
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
|
||||
/*******************
|
||||
* mpi_limb_t
|
||||
* mpihelp_sub_n( mpi_ptr_t res_ptr, (sp + 4)
|
||||
* mpi_ptr_t s1_ptr, (sp + 8)
|
||||
* mpi_ptr_t s2_ptr, (sp + 12)
|
||||
* mpi_size_t size) (sp + 16)
|
||||
*/
|
||||
|
||||
|
||||
.text
|
||||
ALIGN (3)
|
||||
.globl C_SYMBOL_NAME(mpihelp_sub_n)
|
||||
C_SYMBOL_NAME(mpihelp_sub_n:)
|
||||
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
movl 20(%esp),%edi /* res_ptr */
|
||||
movl 24(%esp),%esi /* s1_ptr */
|
||||
movl 28(%esp),%ebp /* s2_ptr */
|
||||
movl 32(%esp),%ecx /* size */
|
||||
|
||||
movl (%ebp),%ebx
|
||||
|
||||
decl %ecx
|
||||
movl %ecx,%edx
|
||||
shrl $3,%ecx
|
||||
andl $7,%edx
|
||||
testl %ecx,%ecx /* zero carry flag */
|
||||
jz Lend
|
||||
pushl %edx
|
||||
|
||||
ALIGN (3)
|
||||
Loop: movl 28(%edi),%eax /* fetch destination cache line */
|
||||
leal 32(%edi),%edi
|
||||
|
||||
L1: movl (%esi),%eax
|
||||
movl 4(%esi),%edx
|
||||
sbbl %ebx,%eax
|
||||
movl 4(%ebp),%ebx
|
||||
sbbl %ebx,%edx
|
||||
movl 8(%ebp),%ebx
|
||||
movl %eax,-32(%edi)
|
||||
movl %edx,-28(%edi)
|
||||
|
||||
L2: movl 8(%esi),%eax
|
||||
movl 12(%esi),%edx
|
||||
sbbl %ebx,%eax
|
||||
movl 12(%ebp),%ebx
|
||||
sbbl %ebx,%edx
|
||||
movl 16(%ebp),%ebx
|
||||
movl %eax,-24(%edi)
|
||||
movl %edx,-20(%edi)
|
||||
|
||||
L3: movl 16(%esi),%eax
|
||||
movl 20(%esi),%edx
|
||||
sbbl %ebx,%eax
|
||||
movl 20(%ebp),%ebx
|
||||
sbbl %ebx,%edx
|
||||
movl 24(%ebp),%ebx
|
||||
movl %eax,-16(%edi)
|
||||
movl %edx,-12(%edi)
|
||||
|
||||
L4: movl 24(%esi),%eax
|
||||
movl 28(%esi),%edx
|
||||
sbbl %ebx,%eax
|
||||
movl 28(%ebp),%ebx
|
||||
sbbl %ebx,%edx
|
||||
movl 32(%ebp),%ebx
|
||||
movl %eax,-8(%edi)
|
||||
movl %edx,-4(%edi)
|
||||
|
||||
leal 32(%esi),%esi
|
||||
leal 32(%ebp),%ebp
|
||||
decl %ecx
|
||||
jnz Loop
|
||||
|
||||
popl %edx
|
||||
Lend:
|
||||
decl %edx /* test %edx w/o clobbering carry */
|
||||
js Lend2
|
||||
incl %edx
|
||||
Loop2:
|
||||
leal 4(%edi),%edi
|
||||
movl (%esi),%eax
|
||||
sbbl %ebx,%eax
|
||||
movl 4(%ebp),%ebx
|
||||
movl %eax,-4(%edi)
|
||||
leal 4(%esi),%esi
|
||||
leal 4(%ebp),%ebp
|
||||
decl %edx
|
||||
jnz Loop2
|
||||
Lend2:
|
||||
movl (%esi),%eax
|
||||
sbbl %ebx,%eax
|
||||
movl %eax,(%edi)
|
||||
|
||||
sbbl %eax,%eax
|
||||
negl %eax
|
||||
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
ret
|
||||
|
103
mpi/mpi-inv.c
103
mpi/mpi-inv.c
|
@ -76,7 +76,7 @@ mpi_invm( MPI x, MPI a, MPI n )
|
|||
mpi_free(t3);
|
||||
mpi_free(u);
|
||||
mpi_free(v);
|
||||
#else
|
||||
#elif 0
|
||||
/* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
|
||||
* modified according to Michael Penk's solution for Exercice 35 */
|
||||
|
||||
|
@ -156,6 +156,107 @@ mpi_invm( MPI x, MPI a, MPI n )
|
|||
mpi_free(t1);
|
||||
mpi_free(t2);
|
||||
mpi_free(t3);
|
||||
#else
|
||||
/* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
|
||||
* modified according to Michael Penk's solution for Exercice 35
|
||||
* with further enhancement */
|
||||
MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
|
||||
unsigned k;
|
||||
int sign;
|
||||
int odd ;
|
||||
|
||||
u = mpi_copy(a);
|
||||
v = mpi_copy(n);
|
||||
for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
|
||||
mpi_rshift(u, u, 1);
|
||||
mpi_rshift(v, v, 1);
|
||||
}
|
||||
odd = mpi_test_bit(v,0);
|
||||
|
||||
u1 = mpi_alloc_set_ui(1);
|
||||
if( !odd )
|
||||
u2 = mpi_alloc_set_ui(0);
|
||||
u3 = mpi_copy(u);
|
||||
v1 = mpi_copy(v);
|
||||
if( !odd ) {
|
||||
v2 = mpi_alloc( mpi_get_nlimbs(u) );
|
||||
mpi_sub( v2, u1, u ); /* U is used as const 1 */
|
||||
}
|
||||
v3 = mpi_copy(v);
|
||||
if( mpi_test_bit(u, 0) ) { /* u is odd */
|
||||
t1 = mpi_alloc_set_ui(0);
|
||||
if( !odd ) {
|
||||
t2 = mpi_alloc_set_ui(1); t2->sign = 1;
|
||||
}
|
||||
t3 = mpi_copy(v); t3->sign = !t3->sign;
|
||||
goto Y4;
|
||||
}
|
||||
else {
|
||||
t1 = mpi_alloc_set_ui(1);
|
||||
if( !odd )
|
||||
t2 = mpi_alloc_set_ui(0);
|
||||
t3 = mpi_copy(u);
|
||||
}
|
||||
do {
|
||||
do {
|
||||
if( !odd ) {
|
||||
if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
|
||||
mpi_add(t1, t1, v);
|
||||
mpi_sub(t2, t2, u);
|
||||
}
|
||||
mpi_rshift(t1, t1, 1);
|
||||
mpi_rshift(t2, t2, 1);
|
||||
mpi_rshift(t3, t3, 1);
|
||||
}
|
||||
else {
|
||||
if( mpi_test_bit(t1, 0) )
|
||||
mpi_add(t1, t1, v);
|
||||
mpi_rshift(t1, t1, 1);
|
||||
mpi_rshift(t3, t3, 1);
|
||||
}
|
||||
Y4:
|
||||
} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
|
||||
|
||||
if( !t3->sign ) {
|
||||
mpi_set(u1, t1);
|
||||
if( !odd )
|
||||
mpi_set(u2, t2);
|
||||
mpi_set(u3, t3);
|
||||
}
|
||||
else {
|
||||
mpi_sub(v1, v, t1);
|
||||
sign = u->sign; u->sign = !u->sign;
|
||||
if( !odd )
|
||||
mpi_sub(v2, u, t2);
|
||||
u->sign = sign;
|
||||
sign = t3->sign; t3->sign = !t3->sign;
|
||||
mpi_set(v3, t3);
|
||||
t3->sign = sign;
|
||||
}
|
||||
mpi_sub(t1, u1, v1);
|
||||
if( !odd )
|
||||
mpi_sub(t2, u2, v2);
|
||||
mpi_sub(t3, u3, v3);
|
||||
if( t1->sign ) {
|
||||
mpi_add(t1, t1, v);
|
||||
if( !odd )
|
||||
mpi_sub(t2, t2, u);
|
||||
}
|
||||
} while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
|
||||
/* mpi_lshift( u3, k ); */
|
||||
mpi_set(x, u1);
|
||||
|
||||
mpi_free(u1);
|
||||
mpi_free(v1);
|
||||
mpi_free(t1);
|
||||
if( !odd ) {
|
||||
mpi_free(u2);
|
||||
mpi_free(v2);
|
||||
mpi_free(t2);
|
||||
}
|
||||
mpi_free(u3);
|
||||
mpi_free(v3);
|
||||
mpi_free(t3);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
119
mpi/mpi-mpow.c
Normal file
119
mpi/mpi-mpow.c
Normal file
|
@ -0,0 +1,119 @@
|
|||
/* mpi-mpow.c - MPI functions
|
||||
* Copyright (c) 1998 by Werner Koch (dd9jn)
|
||||
*
|
||||
* This file is part of G10.
|
||||
*
|
||||
* G10 is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* G10 is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mpi-internal.h"
|
||||
#include "longlong.h"
|
||||
#include <assert.h>
|
||||
|
||||
static int
|
||||
build_index( MPI *exparray, int k, int i, int t )
|
||||
{
|
||||
int j, bitno;
|
||||
int index = 0;
|
||||
|
||||
bitno = t-i;
|
||||
for(j=k-1; j >= 0; j-- ) {
|
||||
index <<= 1;
|
||||
if( mpi_test_bit( exparray[j], bitno ) )
|
||||
index |= 1;
|
||||
}
|
||||
/*log_debug("t=%d i=%d index=%d\n", t, i, index );*/
|
||||
return index;
|
||||
}
|
||||
|
||||
/****************
|
||||
* RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M
|
||||
*/
|
||||
void
|
||||
mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m)
|
||||
{
|
||||
int k; /* number of elements */
|
||||
int t; /* bit size of largest exponent */
|
||||
int i, j, idx;
|
||||
MPI *G; /* table with precomputed values of size 2^k */
|
||||
MPI tmp;
|
||||
|
||||
for(k=0; basearray[k]; k++ )
|
||||
;
|
||||
assert(k);
|
||||
for(t=0, i=0; (tmp=exparray[i]); i++ ) {
|
||||
/*log_mpidump("exp: ", tmp );*/
|
||||
j = mpi_get_nbits(tmp);
|
||||
if( j > t )
|
||||
t = j;
|
||||
}
|
||||
/*log_mpidump("mod: ", m );*/
|
||||
assert(i==k);
|
||||
assert(t);
|
||||
assert( k < 10 );
|
||||
|
||||
G = m_alloc_clear( (1<<k) * sizeof *G );
|
||||
#if 0
|
||||
/* do the precomputation */
|
||||
G[0] = mpi_alloc_set_ui( 1 );
|
||||
for(i=1; i < (1<<k); i++ ) {
|
||||
for(j=0; j < k; j++ ) {
|
||||
if( (i & (1<<j) ) ) {
|
||||
if( !G[i] )
|
||||
G[i] = mpi_copy( basearray[j] );
|
||||
else
|
||||
mpi_mulm( G[i], G[i], basearray[j], m );
|
||||
}
|
||||
}
|
||||
if( !G[i] )
|
||||
G[i] = mpi_alloc(0);
|
||||
}
|
||||
#endif
|
||||
/* and calculate */
|
||||
tmp = mpi_alloc( mpi_get_nlimbs(m)+1 );
|
||||
mpi_set_ui( res, 1 );
|
||||
for(i = 1; i <= t; i++ ) {
|
||||
mpi_mulm(tmp, res, res, m );
|
||||
idx = build_index( exparray, k, i, t );
|
||||
assert( idx >= 0 && idx < (1<<k) );
|
||||
if( !G[idx] ) {
|
||||
if( !idx )
|
||||
G[0] = mpi_alloc_set_ui( 1 );
|
||||
else {
|
||||
for(j=0; j < k; j++ ) {
|
||||
if( (idx & (1<<j) ) ) {
|
||||
if( !G[idx] )
|
||||
G[idx] = mpi_copy( basearray[j] );
|
||||
else
|
||||
mpi_mulm( G[idx], G[idx], basearray[j], m );
|
||||
}
|
||||
}
|
||||
if( !G[idx] )
|
||||
G[idx] = mpi_alloc(0);
|
||||
}
|
||||
}
|
||||
mpi_mulm(res, tmp, G[idx], m );
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
m_free(tmp);
|
||||
for(i=0; i < (1<<k); i++ )
|
||||
mpi_free(G[i]);
|
||||
m_free(G);
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue