aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm/sparcv9-mont.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/sparcv9-mont.pl')
-rwxr-xr-xcrypto/bn/asm/sparcv9-mont.pl43
1 files changed, 27 insertions, 16 deletions
diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl
index d8662878006e..b41903af985f 100755
--- a/crypto/bn/asm/sparcv9-mont.pl
+++ b/crypto/bn/asm/sparcv9-mont.pl
@@ -1,7 +1,14 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -13,7 +20,7 @@
# for undertaken effort are multiple. First of all, UltraSPARC is not
# the whole SPARCv9 universe and other VIS-free implementations deserve
# optimized code as much. Secondly, newly introduced UltraSPARC T1,
-# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
+# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
# several integrated RSA/DSA accelerator circuits accessible through
# kernel driver [only(*)], but having decent user-land software
@@ -23,7 +30,7 @@
# instructions...
# (*) Engine accessing the driver in question is on my TODO list.
-# For reference, acceleator is estimated to give 6 to 10 times
+# For reference, accelerator is estimated to give 6 to 10 times
# improvement on single-threaded RSA sign. It should be noted
# that 6-10x improvement coefficient does not actually mean
# something extraordinary in terms of absolute [single-threaded]
@@ -42,6 +49,9 @@
# module still have hidden potential [see TODO list there], which is
# estimated to be larger than 20%...
+$output = pop;
+open STDOUT,">$output";
+
# int bn_mul_mont(
$rp="%i0"; # BN_ULONG *rp,
$ap="%i1"; # const BN_ULONG *ap,
@@ -50,10 +60,8 @@ $np="%i3"; # const BN_ULONG *np,
$n0="%i4"; # const BN_ULONG *n0,
$num="%i5"; # int num);
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=128; }
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
$car0="%o0";
$car1="%o1";
@@ -76,6 +84,8 @@ $tpj="%l7";
$fname="bn_mul_mont_int";
$code=<<___;
+#include "sparc_arch.h"
+
.section ".text",#alloc,#execinstr
.global $fname
@@ -105,7 +115,7 @@ $fname:
ld [$np],$car1 ! np[0]
sub %o7,$bias,%sp ! alloca
ld [$np+4],$npj ! np[1]
- be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
+ be,pt SIZE_T_CC,.Lbn_sqr_mont
mov 12,$j
mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
@@ -255,7 +265,6 @@ $fname:
.Ltail:
add $np,$num,$np
add $rp,$num,$rp
- mov $tp,$ap
sub %g0,$num,%o7 ! k=-num
ba .Lsub
subcc %g0,%g0,%g0 ! clear %icc.c
@@ -268,15 +277,14 @@ $fname:
add %o7,4,%o7
brnz %o7,.Lsub
st %o1,[$i]
- subc $car2,0,$car2 ! handle upmost overflow bit
- and $tp,$car2,$ap
- andn $rp,$car2,$np
- or $ap,$np,$ap
+ subccc $car2,0,$car2 ! handle upmost overflow bit
sub %g0,$num,%o7
.Lcopy:
- ld [$ap+%o7],%o0 ! copy or in-place refresh
+ ld [$tp+%o7],%o1 ! conditional copy
+ ld [$rp+%o7],%o0
st %g0,[$tp+%o7] ! zap tp
+ movcs %icc,%o1,%o0
st %o0,[$rp+%o7]
add %o7,4,%o7
brnz %o7,.Lcopy
@@ -485,6 +493,9 @@ $code.=<<___;
mulx $npj,$mul1,$acc1
add $tpj,$car1,$car1
ld [$np+$j],$npj ! np[j]
+ srlx $car1,32,$tmp0
+ and $car1,$mask,$car1
+ add $tmp0,$sbit,$sbit
add $acc0,$car1,$car1
ld [$tp+8],$tpj ! tp[j]
add $acc1,$car1,$car1
@@ -601,7 +612,7 @@ $code.=<<___;
add $tp,8,$tp
.type $fname,#function
.size $fname,(.-$fname)
-.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
.align 32
___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;