aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm/x86-mont.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/x86-mont.pl')
-rwxr-xr-xcrypto/bn/asm/x86-mont.pl41
1 files changed, 24 insertions, 17 deletions
diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl
index 89f4de61e896..1c4003efc20a 100755
--- a/crypto/bn/asm/x86-mont.pl
+++ b/crypto/bn/asm/x86-mont.pl
@@ -63,27 +63,26 @@ $frame=32; # size of above frame rounded up to 16n
&lea ("esi",&wparam(0)); # put aside pointer to argument block
&lea ("edx",&wparam(1)); # load ap
- &mov ("ebp","esp"); # saved stack pointer!
&add ("edi",2); # extra two words on top of tp
&neg ("edi");
- &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
+ &lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2))
&neg ("edi");
# minimize cache contention by arraning 2K window between stack
# pointer and ap argument [np is also position sensitive vector,
# but it's assumed to be near ap, as it's allocated at ~same
# time].
- &mov ("eax","esp");
+ &mov ("eax","ebp");
&sub ("eax","edx");
&and ("eax",2047);
- &sub ("esp","eax"); # this aligns sp and ap modulo 2048
+ &sub ("ebp","eax"); # this aligns sp and ap modulo 2048
- &xor ("edx","esp");
+ &xor ("edx","ebp");
&and ("edx",2048);
&xor ("edx",2048);
- &sub ("esp","edx"); # this splits them apart modulo 4096
+ &sub ("ebp","edx"); # this splits them apart modulo 4096
- &and ("esp",-64); # align to cache line
+ &and ("ebp",-64); # align to cache line
# Some OSes, *cough*-dows, insist on stack being "wired" to
# physical memory in strictly sequential manner, i.e. if stack
@@ -91,20 +90,28 @@ $frame=32; # size of above frame rounded up to 16n
# be punishable by SEGV. But page walking can do good even on
# other OSes, because it guarantees that villain thread hits
# the guard page before it can make damage to innocent one...
- &mov ("eax","ebp");
- &sub ("eax","esp");
+ &mov ("eax","esp");
+ &sub ("eax","ebp");
&and ("eax",-4096);
-&set_label("page_walk");
- &mov ("edx",&DWP(0,"esp","eax"));
- &sub ("eax",4096);
- &data_byte(0x2e);
- &jnc (&label("page_walk"));
+ &mov ("edx","esp"); # saved stack pointer!
+ &lea ("esp",&DWP(0,"ebp","eax"));
+ &mov ("eax",&DWP(0,"esp"));
+ &cmp ("esp","ebp");
+ &ja (&label("page_walk"));
+ &jmp (&label("page_walk_done"));
+
+&set_label("page_walk",16);
+ &lea ("esp",&DWP(-4096,"esp"));
+ &mov ("eax",&DWP(0,"esp"));
+ &cmp ("esp","ebp");
+ &ja (&label("page_walk"));
+&set_label("page_walk_done");
################################# load argument block...
&mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
&mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
&mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
- &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
+ &mov ("ebp",&DWP(3*4,"esi"));# const BN_ULONG *np
&mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
#&mov ("edi",&DWP(5*4,"esi"));# int num
@@ -112,11 +119,11 @@ $frame=32; # size of above frame rounded up to 16n
&mov ($_rp,"eax"); # ... save a copy of argument block
&mov ($_ap,"ebx");
&mov ($_bp,"ecx");
- &mov ($_np,"edx");
+ &mov ($_np,"ebp");
&mov ($_n0,"esi");
&lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
#&mov ($_num,$num); # redundant as $num is not reused
- &mov ($_sp,"ebp"); # saved stack pointer!
+ &mov ($_sp,"edx"); # saved stack pointer!
if($sse2) {
$acc0="mm0"; # mmx register bank layout