diff options
Diffstat (limited to 'crypto/md5/asm/md5-x86_64.pl')
-rwxr-xr-x | crypto/md5/asm/md5-x86_64.pl | 55 |
1 files changed, 39 insertions, 16 deletions
diff --git a/crypto/md5/asm/md5-x86_64.pl b/crypto/md5/asm/md5-x86_64.pl index 381bf77e1c6b..386d8048ec02 100755 --- a/crypto/md5/asm/md5-x86_64.pl +++ b/crypto/md5/asm/md5-x86_64.pl @@ -1,11 +1,13 @@ -#!/usr/bin/perl -w -# -# MD5 optimized for AMD64. -# +#! /usr/bin/env perl # Author: Marc Bevand <bevand_m (at) epita.fr> -# Licence: I hereby disclaim the copyright on this code and place it -# in the public domain. +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. # +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# MD5 optimized for AMD64. use strict; @@ -25,8 +27,8 @@ sub round1_step xor $y, %r11d /* y ^ ... */ lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $x, %r11d /* x & ... */ - xor $z, %r11d /* z ^ ... */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ + xor $z, %r11d /* z ^ ... */ add %r11d, $dst /* dst += ... */ rol \$$s, $dst /* dst <<< s */ mov $y, %r11d /* (NEXT STEP) z' = $y */ @@ -43,13 +45,12 @@ EOF sub round2_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; - $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n" if ($pos == -1); $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= <<EOF; not %r11d /* not z */ - lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $x, %r12d /* x & z */ + lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ and $y, %r11d /* y & (not z) */ mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ or %r11d, %r12d /* (y & (not z)) | (x & z) */ @@ -66,21 +67,31 @@ EOF # %r10d = X[k_next] # %r11d = y' (copy of y for the next step) # Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC) +{ my $round3_alter=0; sub round3_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; - $code .= " mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */\n" if ($pos == -1); $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); $code .= <<EOF; lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ - mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ xor $z, %r11d /* z ^ ... */ + mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ xor $x, %r11d /* x ^ ... */ add %r11d, $dst /* dst += ... */ +EOF + $code .= <<EOF if ($round3_alter); rol \$$s, $dst /* dst <<< s */ mov $x, %r11d /* (NEXT STEP) y' = $x */ +EOF + $code .= <<EOF if (!$round3_alter); + mov $x, %r11d /* (NEXT STEP) y' = $x */ + rol \$$s, $dst /* dst <<< s */ +EOF + $code .= <<EOF; add $x, $dst /* dst += x */ EOF + $round3_alter^=1; +} } # round4_step() does: @@ -91,16 +102,15 @@ EOF sub round4_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; - $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); $code .= " mov \$0xffffffff, %r11d\n" if ($pos == -1); $code .= " xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/\n" if ($pos == -1); $code .= <<EOF; lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ or $x, %r11d /* x | ... */ + mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ xor $y, %r11d /* y ^ ... */ add %r11d, $dst /* dst += ... */ - mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ mov \$0xffffffff, %r11d rol \$$s, $dst /* dst <<< s */ xor $y, %r11d /* (NEXT STEP) not z' = not $y */ @@ -120,7 +130,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $code .= <<EOF; @@ -130,11 +140,17 @@ $code .= <<EOF; .globl md5_block_asm_data_order .type md5_block_asm_data_order,\@function,3 md5_block_asm_data_order: +.cfi_startproc push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lprologue: # rdi = arg #1 (ctx, MD5_CTX pointer) @@ -179,7 +195,7 @@ round1_step( 0,'%ebx','%ecx','%edx','%eax','12','0x895cd7be','22'); round1_step( 0,'%eax','%ebx','%ecx','%edx','13','0x6b901122', '7'); round1_step( 0,'%edx','%eax','%ebx','%ecx','14','0xfd987193','12'); round1_step( 0,'%ecx','%edx','%eax','%ebx','15','0xa679438e','17'); -round1_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x49b40821','22'); +round1_step( 1,'%ebx','%ecx','%edx','%eax', '1','0x49b40821','22'); round2_step(-1,'%eax','%ebx','%ecx','%edx', '6','0xf61e2562', '5'); round2_step( 0,'%edx','%eax','%ebx','%ecx','11','0xc040b340', '9'); @@ -196,7 +212,7 @@ round2_step( 0,'%ebx','%ecx','%edx','%eax','13','0x455a14ed','20'); round2_step( 0,'%eax','%ebx','%ecx','%edx', '2','0xa9e3e905', '5'); round2_step( 0,'%edx','%eax','%ebx','%ecx', '7','0xfcefa3f8', '9'); round2_step( 0,'%ecx','%edx','%eax','%ebx','12','0x676f02d9','14'); -round2_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x8d2a4c8a','20'); +round2_step( 1,'%ebx','%ecx','%edx','%eax', '5','0x8d2a4c8a','20'); round3_step(-1,'%eax','%ebx','%ecx','%edx', '8','0xfffa3942', '4'); round3_step( 0,'%edx','%eax','%ebx','%ecx','11','0x8771f681','11'); @@ -251,13 +267,20 @@ $code .= <<EOF; mov %edx, 3*4(%rbp) # ctx->D = D mov (%rsp),%r15 +.cfi_restore %r15 mov 8(%rsp),%r14 +.cfi_restore %r14 mov 16(%rsp),%r12 +.cfi_restore %r12 mov 24(%rsp),%rbx +.cfi_restore %rbx mov 32(%rsp),%rbp +.cfi_restore %rbp add \$40,%rsp +.cfi_adjust_cfa_offset -40 .Lepilogue: ret +.cfi_endproc .size md5_block_asm_data_order,.-md5_block_asm_data_order EOF |