mirror of
git://git.gnupg.org/gnupg.git
synced 2024-12-24 10:39:57 +01:00
240 lines
5.6 KiB
ArmAsm
240 lines
5.6 KiB
ArmAsm
|
/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store
|
||
|
* sum in a third limb vector.
|
||
|
*
|
||
|
* Copyright (C) 1995, 1996, 1998,
|
||
|
* 2001 Free Software Foundation, Inc.
|
||
|
*
|
||
|
* This file is part of GnuPG.
|
||
|
*
|
||
|
* GnuPG is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* GnuPG is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||
|
*/
|
||
|
|
||
|
|
||
|
|
||
|
/*******************
|
||
|
* mpi_limb_t
|
||
|
* mpihelp_add_n( mpi_ptr_t res_ptr,
|
||
|
* mpi_ptr_t s1_ptr,
|
||
|
* mpi_ptr_t s2_ptr,
|
||
|
* mpi_size_t size)
|
||
|
*/
|
||
|
|
||
|
! INPUT PARAMETERS
|
||
|
#define res_ptr %o0
|
||
|
#define s1_ptr %o1
|
||
|
#define s2_ptr %o2
|
||
|
#define size %o3
|
||
|
|
||
|
#include "sysdep.h"
|
||
|
|
||
|
.text
|
||
|
.align 4
|
||
|
.global C_SYMBOL_NAME(mpihelp_add_n)
|
||
|
C_SYMBOL_NAME(mpihelp_add_n):
|
||
|
xor s2_ptr,res_ptr,%g1
|
||
|
andcc %g1,4,%g0
|
||
|
bne L1 ! branch if alignment differs
|
||
|
nop
|
||
|
! ** V1a **
|
||
|
L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
||
|
be L_v1 ! if no, branch
|
||
|
nop
|
||
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||
|
ld [s1_ptr],%g4
|
||
|
add s1_ptr,4,s1_ptr
|
||
|
ld [s2_ptr],%g2
|
||
|
add s2_ptr,4,s2_ptr
|
||
|
add size,-1,size
|
||
|
addcc %g4,%g2,%o4
|
||
|
st %o4,[res_ptr]
|
||
|
add res_ptr,4,res_ptr
|
||
|
L_v1: addx %g0,%g0,%o4 ! save cy in register
|
||
|
cmp size,2 ! if size < 2 ...
|
||
|
bl Lend2 ! ... branch to tail code
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
|
||
|
ld [s1_ptr+0],%g4
|
||
|
addcc size,-10,size
|
||
|
ld [s1_ptr+4],%g1
|
||
|
ldd [s2_ptr+0],%g2
|
||
|
blt Lfin1
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||
|
Loop1: addxcc %g4,%g2,%o4
|
||
|
ld [s1_ptr+8],%g4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
ld [s1_ptr+12],%g1
|
||
|
ldd [s2_ptr+8],%g2
|
||
|
std %o4,[res_ptr+0]
|
||
|
addxcc %g4,%g2,%o4
|
||
|
ld [s1_ptr+16],%g4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
ld [s1_ptr+20],%g1
|
||
|
ldd [s2_ptr+16],%g2
|
||
|
std %o4,[res_ptr+8]
|
||
|
addxcc %g4,%g2,%o4
|
||
|
ld [s1_ptr+24],%g4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
ld [s1_ptr+28],%g1
|
||
|
ldd [s2_ptr+24],%g2
|
||
|
std %o4,[res_ptr+16]
|
||
|
addxcc %g4,%g2,%o4
|
||
|
ld [s1_ptr+32],%g4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
ld [s1_ptr+36],%g1
|
||
|
ldd [s2_ptr+32],%g2
|
||
|
std %o4,[res_ptr+24]
|
||
|
addx %g0,%g0,%o4 ! save cy in register
|
||
|
addcc size,-8,size
|
||
|
add s1_ptr,32,s1_ptr
|
||
|
add s2_ptr,32,s2_ptr
|
||
|
add res_ptr,32,res_ptr
|
||
|
bge Loop1
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
|
||
|
Lfin1: addcc size,8-2,size
|
||
|
blt Lend1
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||
|
Loope1: addxcc %g4,%g2,%o4
|
||
|
ld [s1_ptr+8],%g4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
ld [s1_ptr+12],%g1
|
||
|
ldd [s2_ptr+8],%g2
|
||
|
std %o4,[res_ptr+0]
|
||
|
addx %g0,%g0,%o4 ! save cy in register
|
||
|
addcc size,-2,size
|
||
|
add s1_ptr,8,s1_ptr
|
||
|
add s2_ptr,8,s2_ptr
|
||
|
add res_ptr,8,res_ptr
|
||
|
bge Loope1
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
Lend1: addxcc %g4,%g2,%o4
|
||
|
addxcc %g1,%g3,%o5
|
||
|
std %o4,[res_ptr+0]
|
||
|
addx %g0,%g0,%o4 ! save cy in register
|
||
|
|
||
|
andcc size,1,%g0
|
||
|
be Lret1
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
/* Add last limb */
|
||
|
ld [s1_ptr+8],%g4
|
||
|
ld [s2_ptr+8],%g2
|
||
|
addxcc %g4,%g2,%o4
|
||
|
st %o4,[res_ptr+8]
|
||
|
|
||
|
Lret1: retl
|
||
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||
|
|
||
|
L1: xor s1_ptr,res_ptr,%g1
|
||
|
andcc %g1,4,%g0
|
||
|
bne L2
|
||
|
nop
|
||
|
! ** V1b **
|
||
|
mov s2_ptr,%g1
|
||
|
mov s1_ptr,s2_ptr
|
||
|
b L0
|
||
|
mov %g1,s1_ptr
|
||
|
|
||
|
! ** V2 **
|
||
|
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
||
|
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
||
|
things can be aligned (that we care about) we now know that the alignment
|
||
|
of s1_ptr and s2_ptr are the same. */
|
||
|
|
||
|
L2: cmp size,1
|
||
|
be Ljone
|
||
|
nop
|
||
|
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
|
||
|
be L_v2 ! if no, branch
|
||
|
nop
|
||
|
/* Add least significant limb separately to align s1_ptr and s2_ptr */
|
||
|
ld [s1_ptr],%g4
|
||
|
add s1_ptr,4,s1_ptr
|
||
|
ld [s2_ptr],%g2
|
||
|
add s2_ptr,4,s2_ptr
|
||
|
add size,-1,size
|
||
|
addcc %g4,%g2,%o4
|
||
|
st %o4,[res_ptr]
|
||
|
add res_ptr,4,res_ptr
|
||
|
|
||
|
L_v2: addx %g0,%g0,%o4 ! save cy in register
|
||
|
addcc size,-8,size
|
||
|
blt Lfin2
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||
|
Loop2: ldd [s1_ptr+0],%g2
|
||
|
ldd [s2_ptr+0],%o4
|
||
|
addxcc %g2,%o4,%g2
|
||
|
st %g2,[res_ptr+0]
|
||
|
addxcc %g3,%o5,%g3
|
||
|
st %g3,[res_ptr+4]
|
||
|
ldd [s1_ptr+8],%g2
|
||
|
ldd [s2_ptr+8],%o4
|
||
|
addxcc %g2,%o4,%g2
|
||
|
st %g2,[res_ptr+8]
|
||
|
addxcc %g3,%o5,%g3
|
||
|
st %g3,[res_ptr+12]
|
||
|
ldd [s1_ptr+16],%g2
|
||
|
ldd [s2_ptr+16],%o4
|
||
|
addxcc %g2,%o4,%g2
|
||
|
st %g2,[res_ptr+16]
|
||
|
addxcc %g3,%o5,%g3
|
||
|
st %g3,[res_ptr+20]
|
||
|
ldd [s1_ptr+24],%g2
|
||
|
ldd [s2_ptr+24],%o4
|
||
|
addxcc %g2,%o4,%g2
|
||
|
st %g2,[res_ptr+24]
|
||
|
addxcc %g3,%o5,%g3
|
||
|
st %g3,[res_ptr+28]
|
||
|
addx %g0,%g0,%o4 ! save cy in register
|
||
|
addcc size,-8,size
|
||
|
add s1_ptr,32,s1_ptr
|
||
|
add s2_ptr,32,s2_ptr
|
||
|
add res_ptr,32,res_ptr
|
||
|
bge Loop2
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
|
||
|
Lfin2: addcc size,8-2,size
|
||
|
blt Lend2
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
Loope2: ldd [s1_ptr+0],%g2
|
||
|
ldd [s2_ptr+0],%o4
|
||
|
addxcc %g2,%o4,%g2
|
||
|
st %g2,[res_ptr+0]
|
||
|
addxcc %g3,%o5,%g3
|
||
|
st %g3,[res_ptr+4]
|
||
|
addx %g0,%g0,%o4 ! save cy in register
|
||
|
addcc size,-2,size
|
||
|
add s1_ptr,8,s1_ptr
|
||
|
add s2_ptr,8,s2_ptr
|
||
|
add res_ptr,8,res_ptr
|
||
|
bge Loope2
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
Lend2: andcc size,1,%g0
|
||
|
be Lret2
|
||
|
subcc %g0,%o4,%g0 ! restore cy
|
||
|
/* Add last limb */
|
||
|
Ljone: ld [s1_ptr],%g4
|
||
|
ld [s2_ptr],%g2
|
||
|
addxcc %g4,%g2,%o4
|
||
|
st %o4,[res_ptr]
|
||
|
|
||
|
Lret2: retl
|
||
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
||
|
|
||
|
|
||
|
|