aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/modes
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2018-09-13 19:18:07 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2018-09-13 19:18:07 +0000
commita43ce912fc025d11e1395506111f75fc194d7ba5 (patch)
tree9794cf7720d75938ed0ea4f499c0dcd4b6eacdda /crypto/modes
parent02be298e504b8554caca6dc85af450e1ea44d19d (diff)
downloadsrc-a43ce912fc025d11e1395506111f75fc194d7ba5.tar.gz
src-a43ce912fc025d11e1395506111f75fc194d7ba5.zip
Import OpenSSL 1.1.1.vendor/openssl/1.1.1
Notes
Notes: svn path=/vendor-crypto/openssl/dist/; revision=338658 svn path=/vendor-crypto/openssl/1.1.1/; revision=338659; tag=vendor/openssl/1.1.1
Diffstat (limited to 'crypto/modes')
-rw-r--r--crypto/modes/Makefile160
-rwxr-xr-xcrypto/modes/asm/aesni-gcm-x86_64.pl54
-rwxr-xr-xcrypto/modes/asm/ghash-armv4.pl101
-rwxr-xr-xcrypto/modes/asm/ghash-c64xplus.pl247
-rwxr-xr-xcrypto/modes/asm/ghash-ia64.pl13
-rwxr-xr-xcrypto/modes/asm/ghash-parisc.pl23
-rwxr-xr-xcrypto/modes/asm/ghash-s390x.pl36
-rwxr-xr-xcrypto/modes/asm/ghash-sparcv9.pl32
-rwxr-xr-xcrypto/modes/asm/ghash-x86.pl35
-rwxr-xr-xcrypto/modes/asm/ghash-x86_64.pl97
-rwxr-xr-xcrypto/modes/asm/ghashp8-ppc.pl481
-rwxr-xr-xcrypto/modes/asm/ghashv8-armx.pl404
-rw-r--r--crypto/modes/build.info30
-rw-r--r--crypto/modes/cbc128.c66
-rw-r--r--crypto/modes/ccm128.c59
-rw-r--r--crypto/modes/cfb128.c70
-rw-r--r--crypto/modes/ctr128.c70
-rw-r--r--crypto/modes/cts128.c226
-rw-r--r--crypto/modes/gcm128.c1273
-rw-r--r--crypto/modes/modes.h163
-rw-r--r--crypto/modes/modes_lcl.h55
-rw-r--r--crypto/modes/ocb128.c562
-rw-r--r--crypto/modes/ofb128.c62
-rw-r--r--crypto/modes/wrap128.c313
-rw-r--r--crypto/modes/xts128.c59
25 files changed, 2725 insertions, 1966 deletions
diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile
deleted file mode 100644
index 2528f4a1b9ca..000000000000
--- a/crypto/modes/Makefile
+++ /dev/null
@@ -1,160 +0,0 @@
-#
-# OpenSSL/crypto/modes/Makefile
-#
-
-DIR= modes
-TOP= ../..
-CC= cc
-INCLUDES= -I.. -I$(TOP) -I../../include
-CFLAG=-g
-MAKEFILE= Makefile
-AR= ar r
-
-MODES_ASM_OBJ=
-
-CFLAGS= $(INCLUDES) $(CFLAG)
-ASFLAGS= $(INCLUDES) $(ASFLAG)
-AFLAGS= $(ASFLAGS)
-
-GENERAL=Makefile
-TEST=
-APPS=
-
-LIB=$(TOP)/libcrypto.a
-LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
- ccm128.c xts128.c wrap128.c
-LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
- ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ)
-
-SRC= $(LIBSRC)
-
-#EXHEADER= store.h str_compat.h
-EXHEADER= modes.h
-HEADER= modes_lcl.h $(EXHEADER)
-
-ALL= $(GENERAL) $(SRC) $(HEADER)
-
-top:
- (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
-
-all: lib
-
-lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
- $(RANLIB) $(LIB) || echo Never mind.
- @touch lib
-
-ghash-ia64.s: asm/ghash-ia64.pl
- $(PERL) asm/ghash-ia64.pl $@ $(CFLAGS)
-ghash-x86.s: asm/ghash-x86.pl
- $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
-ghash-x86_64.s: asm/ghash-x86_64.pl
- $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
-aesni-gcm-x86_64.s: asm/aesni-gcm-x86_64.pl
- $(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@
-ghash-sparcv9.s: asm/ghash-sparcv9.pl
- $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
-ghash-alpha.s: asm/ghash-alpha.pl
- (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
- $(PERL) asm/ghash-alpha.pl > $$preproc && \
- $(CC) -E -P $$preproc > $@ && rm $$preproc)
-ghash-parisc.s: asm/ghash-parisc.pl
- $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
-ghashv8-armx.S: asm/ghashv8-armx.pl
- $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
-ghashp8-ppc.s: asm/ghashp8-ppc.pl
- $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
-
-# GNU make "catch all"
-ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
-
-ghash-armv4.o: ghash-armv4.S
-ghashv8-armx.o: ghashv8-armx.S
-
-files:
- $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
-
-links:
- @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
- @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
- @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
-
-install:
- @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
- @headerlist="$(EXHEADER)"; for i in $$headerlist; \
- do \
- (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
- chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
- done;
-
-tags:
- ctags $(SRC)
-
-tests:
-
-lint:
- lint -DLINT $(INCLUDES) $(SRC)>fluff
-
-update: depend
-
-depend:
- @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
- $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
-
-dclean:
- $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
- mv -f Makefile.new $(MAKEFILE)
-
-clean:
- rm -f *.s *.S *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
-
-# DO NOT DELETE THIS LINE -- make depend depends on it.
-
-cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h
-ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h
-cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h
-ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h
-cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h
-gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h
-ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
-wrap128.o: ../../e_os.h ../../include/openssl/bio.h
-wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
-wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
-wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h
-wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
-wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-wrap128.o: ../cryptlib.h wrap128.c
-xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
-xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
index 980cfd23efe3..b42016101ebc 100755
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -22,10 +29,13 @@
# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
# Locktyukhin of Intel Corp. who verified that it reduces shuffles
# pressure with notable relative improvement, achieving 1.0 cycle per
-# byte processed with 128-bit key on Haswell processor, and 0.74 -
-# on Broadwell. [Mentioned results are raw profiled measurements for
-# favourable packet size, one divisible by 96. Applications using the
-# EVP interface will observe a few percent worse performance.]
+# byte processed with 128-bit key on Haswell processor, 0.74 - on
+# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled
+# measurements for favourable packet size, one divisible by 96.
+# Applications using the EVP interface will observe a few percent
+# worse performance.]
+#
+# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
#
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
@@ -60,7 +70,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
if ($avx>1) {{{
@@ -399,17 +409,25 @@ $code.=<<___;
.type aesni_gcm_decrypt,\@function,6
.align 32
aesni_gcm_decrypt:
+.cfi_startproc
xor $ret,$ret
cmp \$0x60,$len # minimal accepted length
jb .Lgcm_dec_abort
lea (%rsp),%rax # save stack pointer
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@ -501,15 +519,23 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
mov $ret,%rax # return value
ret
+.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
___
@@ -609,17 +635,25 @@ _aesni_ctr32_6x:
.type aesni_gcm_encrypt,\@function,6
.align 32
aesni_gcm_encrypt:
+.cfi_startproc
xor $ret,$ret
cmp \$0x60*3,$len # minimal accepted length
jb .Lgcm_enc_abort
lea (%rsp),%rax # save stack pointer
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
@@ -882,15 +916,23 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
mov $ret,%rax # return value
ret
+.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
___
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 8ccc963ef297..dcc23f7d7dbe 100755
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -42,12 +49,12 @@
# below and combine it with reduction algorithm from x86 module.
# Performance improvement over previous version varies from 65% on
# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
-# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
-# in 9.33.
+# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
+# Snapdragon S4 - in 9.33.
#
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
-#
+#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
# ====================================================================
@@ -71,8 +78,20 @@
# *native* byte order on current platform. See gcm128.c for working
# example...
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$Xi="r0"; # argument block
$Htbl="r1";
@@ -124,11 +143,15 @@ $code=<<___;
#include "arm_arch.h"
.text
+#if defined(__thumb2__) || defined(__clang__)
+.syntax unified
+#define ldrplb ldrbpl
+#define ldrneb ldrbne
+#endif
+#if defined(__thumb2__)
+.thumb
+#else
.code 32
-
-#ifdef __clang__
-#define ldrplb ldrbpl
-#define ldrneb ldrbne
#endif
.type rem_4bit,%object
@@ -142,19 +165,27 @@ rem_4bit:
.type rem_4bit_get,%function
rem_4bit_get:
- sub $rem_4bit,pc,#8
- sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
+#if defined(__thumb2__)
+ adr $rem_4bit,rem_4bit
+#else
+ sub $rem_4bit,pc,#8+32 @ &rem_4bit
+#endif
b .Lrem_4bit_got
nop
+ nop
.size rem_4bit_get,.-rem_4bit_get
.global gcm_ghash_4bit
.type gcm_ghash_4bit,%function
+.align 4
gcm_ghash_4bit:
- sub r12,pc,#8
+#if defined(__thumb2__)
+ adr r12,rem_4bit
+#else
+ sub r12,pc,#8+48 @ &rem_4bit
+#endif
add $len,$inp,$len @ $len to point at the end
stmdb sp!,{r3-r11,lr} @ save $len/end too
- sub r12,r12,#48 @ &rem_4bit
ldmia r12,{r4-r11} @ copy rem_4bit ...
stmdb sp!,{r4-r11} @ ... to stack
@@ -201,6 +232,9 @@ gcm_ghash_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $nlo,[$inp,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
@@ -211,6 +245,9 @@ gcm_ghash_4bit:
add $nhi,$nhi,$nhi
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
eor $Zll,$Tll,$Zll,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $Tll,[$Xi,$cnt]
eor $Zll,$Zll,$Zlh,lsl#28
eor $Zlh,$Tlh,$Zlh,lsr#4
@@ -218,8 +255,14 @@ gcm_ghash_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
+#ifdef __thumb2__
+ it pl
+#endif
eorpl $nlo,$nlo,$Tll
eor $Zhh,$Thh,$Zhh,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
@@ -229,7 +272,11 @@ gcm_ghash_4bit:
add $inp,$inp,#16
mov $nhi,$Zll
___
- &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
+ &Zsmash("cmp\t$inp,$len","\n".
+ "#ifdef __thumb2__\n".
+ " it ne\n".
+ "#endif\n".
+ " ldrneb $nlo,[$inp,#15]");
$code.=<<___;
bne .Louter
@@ -287,6 +334,9 @@ gcm_gmult_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $nlo,[$Xi,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
@@ -304,6 +354,9 @@ gcm_gmult_4bit:
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
@@ -378,9 +431,9 @@ $code.=<<___;
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
- vld1.64 $IN#hi,[r1,:64]! @ load H
+ vld1.64 $IN#hi,[r1]! @ load H
vmov.i8 $t0,#0xe1
- vld1.64 $IN#lo,[r1,:64]
+ vld1.64 $IN#lo,[r1]
vshl.i64 $t0#hi,#57
vshr.u64 $t0#lo,#63 @ t0=0xc2....01
vdup.8 $t1,$IN#hi[7]
@@ -399,8 +452,8 @@ gcm_init_neon:
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
- vld1.64 $IN#hi,[$Xi,:64]! @ load Xi
- vld1.64 $IN#lo,[$Xi,:64]!
+ vld1.64 $IN#hi,[$Xi]! @ load Xi
+ vld1.64 $IN#lo,[$Xi]!
vmov.i64 $k48,#0x0000ffffffffffff
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
vmov.i64 $k32,#0x00000000ffffffff
@@ -417,8 +470,8 @@ gcm_gmult_neon:
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
- vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi
- vld1.64 $Xl#lo,[$Xi,:64]!
+ vld1.64 $Xl#hi,[$Xi]! @ load Xi
+ vld1.64 $Xl#lo,[$Xi]!
vmov.i64 $k48,#0x0000ffffffffffff
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
vmov.i64 $k32,#0x00000000ffffffff
@@ -472,9 +525,9 @@ $code.=<<___;
#ifdef __ARMEL__
vrev64.8 $Xl,$Xl
#endif
- sub $Xi,#16
- vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi
- vst1.64 $Xl#lo,[$Xi,:64]
+ sub $Xi,#16
+ vst1.64 $Xl#hi,[$Xi]! @ write out Xi
+ vst1.64 $Xl#lo,[$Xi]
ret @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
diff --git a/crypto/modes/asm/ghash-c64xplus.pl b/crypto/modes/asm/ghash-c64xplus.pl
new file mode 100755
index 000000000000..3cadda39945c
--- /dev/null
+++ b/crypto/modes/asm/ghash-c64xplus.pl
@@ -0,0 +1,247 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# December 2011
+#
+# The module implements GCM GHASH function and underlying single
+# multiplication operation in GF(2^128). Even though subroutines
+# have _4bit suffix, they are not using any tables, but rely on
+# hardware Galois Field Multiply support. Streamed GHASH processes
+# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven
+# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are
+# comparing apples vs. oranges, but compiler surely could have done
+# better, because theoretical [though not necessarily achievable]
+# estimate for "4-bit" table-driven implementation is ~12 cycles.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6"); # arguments
+
+($Z0,$Z1,$Z2,$Z3, $H0, $H1, $H2, $H3,
+ $H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27));
+($H01u,$H01y,$H2u,$H3u, $H0y,$H1y,$H2y,$H3y,
+ $H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27));
+($FF000000,$E10000)=("B30","B31");
+($xip,$x0,$x1,$xib)=map("B$_",(6..9)); # $xip zaps $len
+ $xia="A9";
+($rem,$res)=("B4","B5"); # $rem zaps $Htable
+
+$code.=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .asg gcm_gmult_1bit,_gcm_gmult_1bit
+ .asg gcm_gmult_4bit,_gcm_gmult_4bit
+ .asg gcm_ghash_4bit,_gcm_ghash_4bit
+ .endif
+
+ .asg B3,RA
+
+ .if 0
+ .global _gcm_gmult_1bit
+_gcm_gmult_1bit:
+ ADDAD $Htable,2,$Htable
+ .endif
+ .global _gcm_gmult_4bit
+_gcm_gmult_4bit:
+ .asmfunc
+ LDDW *${Htable}[-1],$H1:$H0 ; H.lo
+ LDDW *${Htable}[-2],$H3:$H2 ; H.hi
+|| MV $Xip,${xip} ; reassign Xi
+|| MVK 15,B1 ; SPLOOPD constant
+
+ MVK 0xE1,$E10000
+|| LDBU *++${xip}[15],$x1 ; Xi[15]
+ MVK 0xFF,$FF000000
+|| LDBU *--${xip},$x0 ; Xi[14]
+ SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
+ SHL $FF000000,24,$FF000000 ; upper byte mask
+|| BNOP ghash_loop?
+|| MVK 1,B0 ; take a single spin
+
+ PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
+ AND $H2,$FF000000,$H2u ; H2's upper byte
+ AND $H3,$FF000000,$H3u ; H3's upper byte
+|| SHRU $H2u,8,$H2u
+ SHRU $H3u,8,$H3u
+|| ZERO $Z1:$Z0
+ SHRU2 $xia,8,$H01u
+|| ZERO $Z3:$Z2
+ .endasmfunc
+
+ .global _gcm_ghash_4bit
+_gcm_ghash_4bit:
+ .asmfunc
+ LDDW *${Htable}[-1],$H1:$H0 ; H.lo
+|| SHRU $len,4,B0 ; reassign len
+ LDDW *${Htable}[-2],$H3:$H2 ; H.hi
+|| MV $Xip,${xip} ; reassign Xi
+|| MVK 15,B1 ; SPLOOPD constant
+
+ MVK 0xE1,$E10000
+|| [B0] LDNDW *${inp}[1],$H1x:$H0x
+ MVK 0xFF,$FF000000
+|| [B0] LDNDW *${inp}++[2],$H3x:$H2x
+ SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial
+|| LDDW *${xip}[1],$Z1:$Z0
+ SHL $FF000000,24,$FF000000 ; upper byte mask
+|| LDDW *${xip}[0],$Z3:$Z2
+
+ PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes
+ AND $H2,$FF000000,$H2u ; H2's upper byte
+ AND $H3,$FF000000,$H3u ; H3's upper byte
+|| SHRU $H2u,8,$H2u
+ SHRU $H3u,8,$H3u
+ SHRU2 $xia,8,$H01u
+
+|| [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
+|| [B0] XOR $H1x,$Z1,$Z1
+ .if .LITTLE_ENDIAN
+ [B0] XOR $H2x,$Z2,$Z2
+|| [B0] XOR $H3x,$Z3,$Z3
+|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
+ STDW $Z1:$Z0,*${xip}[1]
+|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
+|| [B0] ZERO $Z1:$Z0
+ .else
+ [B0] XOR $H2x,$Z2,$Z2
+|| [B0] XOR $H3x,$Z3,$Z3
+|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
+ STDW $Z1:$Z0,*${xip}[1]
+|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
+|| [B0] ZERO $Z1:$Z0
+ .endif
+ STDW $Z3:$Z2,*${xip}[0]
+|| [B0] ZERO $Z3:$Z2
+|| [B0] MV $xia,$x1
+ [B0] ADDK 14,${xip}
+
+ghash_loop?:
+ SPLOOPD 6 ; 6*16+7
+|| MVC B1,ILC
+|| [B0] SUB B0,1,B0
+|| ZERO A0
+|| ADD $x1,$x1,$xib ; SHL $x1,1,$xib
+|| SHL $x1,1,$xia
+___
+
+########____________________________
+# 0 D2. M1 M2 |
+# 1 M1 |
+# 2 M1 M2 |
+# 3 D1. M1 M2 |
+# 4 S1. L1 |
+# 5 S2 S1x L1 D2 L2 |____________________________
+# 6/0 L1 S1 L2 S2x |D2. M1 M2 |
+# 7/1 L1 S1 D1x S2 M2 | M1 |
+# 8/2 S1 L1x S2 | M1 M2 |
+# 9/3 S1 L1x | D1. M1 M2 |
+# 10/4 D1x | S1. L1 |
+# 11/5 |S2 S1x L1 D2 L2 |____________
+# 12/6/0 D1x __| L1 S1 L2 S2x |D2. ....
+# 7/1 L1 S1 D1x S2 M2 | ....
+# 8/2 S1 L1x S2 | ....
+#####... ................|............
+$code.=<<___;
+ XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
+|| XORMPY $H01u,$xib,$H01y
+|| [A0] LDBU *--${xip},$x0
+ XORMPY $H1,$xia,$H1x ; 1
+ XORMPY $H2,$xia,$H2x ; 2
+|| XORMPY $H2u,$xib,$H2y
+ XORMPY $H3,$xia,$H3x ; 3
+|| XORMPY $H3u,$xib,$H3y
+||[!A0] MVK.D 15,A0 ; *--${xip} counter
+ XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
+|| [A0] SUB.S A0,1,A0
+ XOR.L $H1x,$Z1,$Z1 ; 5
+|| AND.D $H01y,$FF000000,$H0z
+|| SWAP2.L $H01y,$H1y ; ; SHL $H01y,16,$H1y
+|| SHL $x0,1,$xib
+|| SHL $x0,1,$xia
+
+ XOR.L $H2x,$Z2,$Z2 ; 6/0 ; [0,0] in epilogue
+|| SHL $Z0,1,$rem ; ; rem=Z<<1
+|| SHRMB.S $Z1,$Z0,$Z0 ; ; Z>>=8
+|| AND.L $H1y,$FF000000,$H1z
+ XOR.L $H3x,$Z3,$Z3 ; 7/1
+|| SHRMB.S $Z2,$Z1,$Z1
+|| XOR.D $H0z,$Z0,$Z0 ; merge upper byte products
+|| AND.S $H2y,$FF000000,$H2z
+|| XORMPY $E10000,$rem,$res ; ; implicit rem&0x1FE
+ XOR.L $H1z,$Z1,$Z1 ; 8/2
+|| SHRMB.S $Z3,$Z2,$Z2
+|| AND.S $H3y,$FF000000,$H3z
+ XOR.L $H2z,$Z2,$Z2 ; 9/3
+|| SHRU $Z3,8,$Z3
+ XOR.D $H3z,$Z3,$Z3 ; 10/4
+ NOP ; 11/5
+
+ SPKERNEL 0,2
+|| XOR.D $res,$Z3,$Z3 ; 12/6/0; Z^=res
+
+ ; input pre-fetch is possible where D1 slot is available...
+ [B0] LDNDW *${inp}[1],$H1x:$H0x ; 8/-
+ [B0] LDNDW *${inp}++[2],$H3x:$H2x ; 9/-
+ NOP ; 10/-
+ .if .LITTLE_ENDIAN
+ SWAP2 $Z0,$Z1 ; 11/-
+|| SWAP4 $Z1,$Z0
+ SWAP4 $Z1,$Z1 ; 12/-
+|| SWAP2 $Z0,$Z0
+ SWAP2 $Z2,$Z3
+|| SWAP4 $Z3,$Z2
+||[!B0] BNOP RA
+ SWAP4 $Z3,$Z3
+|| SWAP2 $Z2,$Z2
+|| [B0] BNOP ghash_loop?
+ [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
+|| [B0] XOR $H1x,$Z1,$Z1
+ [B0] XOR $H2x,$Z2,$Z2
+|| [B0] XOR $H3x,$Z3,$Z3
+|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall
+ STDW $Z1:$Z0,*${xip}[1]
+|| [B0] SHRU $Z1,16,$x0 ; Xi[14]
+|| [B0] ZERO $Z1:$Z0
+ .else
+ [!B0] BNOP RA ; 11/-
+ [B0] BNOP ghash_loop? ; 12/-
+ [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp
+|| [B0] XOR $H1x,$Z1,$Z1
+ [B0] XOR $H2x,$Z2,$Z2
+|| [B0] XOR $H3x,$Z3,$Z3
+|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall
+ STDW $Z1:$Z0,*${xip}[1]
+|| [B0] SHRU $Z0,8,$x0 ; Xi[14]
+|| [B0] ZERO $Z1:$Z0
+ .endif
+ STDW $Z3:$Z2,*${xip}[0]
+|| [B0] ZERO $Z3:$Z2
+|| [B0] MV $xia,$x1
+ [B0] ADDK 14,${xip}
+ .endasmfunc
+
+ .sect .const
+ .cstring "GHASH for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/modes/asm/ghash-ia64.pl b/crypto/modes/asm/ghash-ia64.pl
index 0354c9544485..eb9ded91e5b6 100755
--- a/crypto/modes/asm/ghash-ia64.pl
+++ b/crypto/modes/asm/ghash-ia64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -32,7 +39,7 @@
# Itanium performance should remain the same as the "256B" version,
# i.e. ~8.5 cycles.
-$output=shift and (open STDOUT,">$output" or die "can't open $output: $!");
+$output=pop and (open STDOUT,">$output" or die "can't open $output: $!");
if ($^O eq "hpux") {
$ADDP="addp4";
@@ -149,7 +156,7 @@ $code.=<<___;
___
######################################################################
-# "528B" (well, "512B" actualy) streamed GHASH
+# "528B" (well, "512B" actually) streamed GHASH
#
$Xip="in0";
$Htbl="in1";
diff --git a/crypto/modes/asm/ghash-parisc.pl b/crypto/modes/asm/ghash-parisc.pl
index d5ad96b40335..a614c99c22ce 100755
--- a/crypto/modes/asm/ghash-parisc.pl
+++ b/crypto/modes/asm/ghash-parisc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -698,7 +705,7 @@ my $depd = sub {
my ($mod,$args) = @_;
my $orig = "depd$mod\t$args";
- # I only have ",z" completer, it's impicitly encoded...
+ # I only have ",z" completer, it's implicitly encoded...
if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
{ my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
my $cpos=63-$2;
@@ -717,6 +724,11 @@ sub assemble {
ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
}
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+ =~ /GNU assembler/) {
+ $gnuas = 1;
+}
+
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
if ($SIZE_T==4) {
@@ -724,7 +736,12 @@ foreach (split("\n",$code)) {
s/cmpb,\*/comb,/;
s/,\*/,/;
}
- s/\bbv\b/bve/ if ($SIZE_T==8);
+
+ s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
+ s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
+ s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
+ s/\bbv\b/bve/ if ($SIZE_T==8);
+
print $_,"\n";
}
diff --git a/crypto/modes/asm/ghash-s390x.pl b/crypto/modes/asm/ghash-s390x.pl
index be7d55f74876..17dc375053c5 100755
--- a/crypto/modes/asm/ghash-s390x.pl
+++ b/crypto/modes/asm/ghash-s390x.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -47,7 +54,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$softonly=0;
@@ -73,6 +80,8 @@ $rem_4bit="%r14";
$sp="%r15";
$code.=<<___;
+#include "s390x_arch.h"
+
.text
.globl gcm_gmult_4bit
@@ -81,16 +90,14 @@ gcm_gmult_4bit:
___
$code.=<<___ if(!$softonly && 0); # hardware is slow for single block...
larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security-assist
- jz .Lsoft_gmult
lghi %r0,0
- lg %r1,24(%r1) # load second word of kimd capabilities vector
+ lg %r1,S390X_KIMD+8(%r1) # load second word of kimd capabilities
+ # vector
tmhh %r1,0x4000 # check for function 65
jz .Lsoft_gmult
stg %r0,16($sp) # arrange 16 bytes of zero input
stg %r0,24($sp)
- lghi %r0,65 # function 65
+ lghi %r0,S390X_GHASH # function 65
la %r1,0($Xi) # H lies right after Xi in gcm128_context
la $inp,16($sp)
lghi $len,16
@@ -119,16 +126,11 @@ gcm_ghash_4bit:
___
$code.=<<___ if(!$softonly);
larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security-assist
- jz .Lsoft_ghash
- lghi %r0,0
- la %r1,16($sp)
- .long 0xb93e0004 # kimd %r0,%r4
- lg %r1,24($sp)
- tmhh %r1,0x4000 # check for function 65
+ lg %r0,S390X_KIMD+8(%r1) # load second word of kimd capabilities
+ # vector
+ tmhh %r0,0x4000 # check for function 65
jz .Lsoft_ghash
- lghi %r0,65 # function 65
+ lghi %r0,S390X_GHASH # function 65
la %r1,0($Xi) # H lies right after Xi in gcm128_context
.long 0xb93e0004 # kimd %r0,$inp
brc 1,.-4 # pay attention to "partial completion"
@@ -151,7 +153,7 @@ $code.=<<___;
lg $Zhi,0+1($Xi)
lghi $tmp,0
.Louter:
- xg $Zhi,0($inp) # Xi ^= inp
+ xg $Zhi,0($inp) # Xi ^= inp
xg $Zlo,8($inp)
xgr $Zhi,$tmp
stg $Zlo,8+1($Xi)
diff --git a/crypto/modes/asm/ghash-sparcv9.pl b/crypto/modes/asm/ghash-sparcv9.pl
index b129ba706f0f..c4eb3b1f0206 100755
--- a/crypto/modes/asm/ghash-sparcv9.pl
+++ b/crypto/modes/asm/ghash-sparcv9.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -46,14 +53,12 @@
# saturates at ~15.5x single-process result on 8-core processor,
# or ~20.5GBps per 2.85GHz socket.
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
-
-$output=shift;
+$output=pop;
open STDOUT,">$output";
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
+
$Zhi="%o0"; # 64-bit values
$Zlo="%o1";
$Thi="%o2";
@@ -75,11 +80,14 @@ $Htbl="%i1";
$inp="%i2";
$len="%i3";
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
-___
-$code.=<<___;
+#endif
+
.section ".text",#alloc,#execinstr
.align 64
@@ -183,7 +191,7 @@ gcm_ghash_4bit:
add $inp,16,$inp
cmp $inp,$len
- be,pn `$bits==64?"%xcc":"%icc"`,.Ldone
+ be,pn SIZE_T_CC,.Ldone
and $Zlo,0xf,$remi
ldx [$Htblo+$nhi],$Tlo
@@ -532,7 +540,7 @@ ___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis3 {
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl
index 0269169fa743..bcbe6e399d13 100755
--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/modes/asm/ghash-x86.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -88,7 +95,7 @@
# where Tproc is time required for Karatsuba pre- and post-processing,
# is more realistic estimate. In this case it gives ... 1.91 cycles.
# Or in other words, depending on how well we can interleave reduction
-# and one of the two multiplications the performance should be betwen
+# and one of the two multiplications the performance should be between
# 1.91 and 2.16. As already mentioned, this implementation processes
# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart
# - in 2.02. x86_64 performance is better, because larger register
@@ -96,14 +103,13 @@
#
# Does it make sense to increase Naggr? To start with it's virtually
# impossible in 32-bit mode, because of limited register bank
-# capacity. Otherwise improvement has to be weighed agiainst slower
+# capacity. Otherwise improvement has to be weighed against slower
# setup, as well as code size and complexity increase. As even
# optimistic estimate doesn't promise 30% performance improvement,
# there are currently no plans to increase Naggr.
#
-# Special thanks to David Woodhouse <dwmw2@infradead.org> for
-# providing access to a Westmere-based system on behalf of Intel
-# Open Source Technology Centre.
+# Special thanks to David Woodhouse for providing access to a
+# Westmere-based system on behalf of Intel Open Source Technology Centre.
# January 2010
#
@@ -129,7 +135,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
-&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
+$output=pop;
+open STDOUT,">$output";
+
+&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
$sse2=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -712,7 +721,7 @@ sub mmx_loop() {
&pxor ($red[1],$red[1]);
&pxor ($red[2],$red[2]);
- # Just like in "May" verson modulo-schedule for critical path in
+ # Just like in "May" version modulo-schedule for critical path in
# 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor'
# is scheduled so late that rem_8bit[] has to be shifted *right*
# by 16, which is why last argument to pinsrw is 2, which
@@ -801,7 +810,7 @@ sub mmx_loop() {
&bswap ($dat);
&pshufw ($Zhi,$Zhi,0b00011011); # 76543210
&bswap ("ebx");
-
+
&cmp ("ecx",&DWP(528+16+8,"esp")); # are we done?
&jne (&label("outer"));
}
@@ -905,7 +914,7 @@ my ($Xhi,$Xi) = @_;
&psllq ($Xi,57); #
&movdqa ($T1,$Xi); #
&pslldq ($Xi,8);
- &psrldq ($T1,8); #
+ &psrldq ($T1,8); #
&pxor ($Xi,$T2);
&pxor ($Xhi,$T1); #
@@ -1075,7 +1084,7 @@ my ($Xhi,$Xi) = @_;
&psllq ($Xi,57); #
&movdqa ($T1,$Xi); #
&pslldq ($Xi,8);
- &psrldq ($T1,8); #
+ &psrldq ($T1,8); #
&pxor ($Xi,$T2);
&pxor ($Xhi,$T1); #
&pshufd ($T1,$Xhn,0b01001110);
@@ -1138,7 +1147,7 @@ my ($Xhi,$Xi) = @_;
&movdqu (&QWP(0,$Xip),$Xi);
&function_end("gcm_ghash_clmul");
-} else { # Algorith 5. Kept for reference purposes.
+} else { # Algorithm 5. Kept for reference purposes.
sub reduction_alg5 { # 19/16 times faster than Intel version
my ($Xhi,$Xi)=@_;
@@ -1369,6 +1378,8 @@ my ($Xhi,$Xi)=@_;
&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+close STDOUT;
+
# A question was risen about choice of vanilla MMX. Or rather why wasn't
# SSE2 chosen instead? In addition to the fact that MMX runs on legacy
# CPUs such as PIII, "4-bit" MMX version was observed to provide better
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index f889f2018789..afc30c3e72a4 100755
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -37,9 +44,8 @@
# See ghash-x86.pl for background information and details about coding
# techniques.
#
-# Special thanks to David Woodhouse <dwmw2@infradead.org> for
-# providing access to a Westmere-based system on behalf of Intel
-# Open Source Technology Centre.
+# Special thanks to David Woodhouse for providing access to a
+# Westmere-based system on behalf of Intel Open Source Technology Centre.
# December 2012
#
@@ -64,8 +70,11 @@
# Ivy Bridge 1.80(+7%)
# Haswell 0.55(+93%) (if system doesn't support AVX)
# Broadwell 0.45(+110%)(if system doesn't support AVX)
+# Skylake 0.44(+110%)(if system doesn't support AVX)
# Bulldozer 1.49(+27%)
# Silvermont 2.88(+13%)
+# Knights L 2.12(-) (if system doesn't support AVX)
+# Goldmont 1.08(+24%)
# March 2013
#
@@ -74,8 +83,10 @@
# CPUs such as Sandy and Ivy Bridge can execute it, the code performs
# sub-optimally in comparison to above mentioned version. But thanks
# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
-# it performs in 0.41 cycles per byte on Haswell processor, and in
-# 0.29 on Broadwell.
+# it performs in 0.41 cycles per byte on Haswell processor, in
+# 0.29 on Broadwell, and in 0.36 on Skylake.
+#
+# Knights Landing achieves 1.09 cpb.
#
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
@@ -109,7 +120,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
$do4xaggr=1;
@@ -227,9 +238,21 @@ $code=<<___;
.type gcm_gmult_4bit,\@function,2
.align 16
gcm_gmult_4bit:
+.cfi_startproc
push %rbx
- push %rbp # %rbp and %r12 are pushed exclusively in
+.cfi_push %rbx
+ push %rbp # %rbp and others are pushed exclusively in
+.cfi_push %rbp
push %r12 # order to reuse Win64 exception handler...
+.cfi_push %r12
+ push %r13
+.cfi_push %r13
+ push %r14
+.cfi_push %r14
+ push %r15
+.cfi_push %r15
+ sub \$280,%rsp
+.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzb 15($Xi),$Zlo
@@ -240,10 +263,15 @@ $code.=<<___;
mov $Zlo,8($Xi)
mov $Zhi,($Xi)
- mov 16(%rsp),%rbx
- lea 24(%rsp),%rsp
+ lea 280+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ mov -8(%rsi),%rbx
+.cfi_restore %rbx
+ lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
ret
+.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
___
@@ -257,13 +285,21 @@ $code.=<<___;
.type gcm_ghash_4bit,\@function,4
.align 16
gcm_ghash_4bit:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$280,%rsp
+.cfi_adjust_cfa_offset 280
.Lghash_prologue:
mov $inp,%r14 # reassign couple of args
mov $len,%r15
@@ -391,16 +427,25 @@ $code.=<<___;
mov $Zlo,8($Xi)
mov $Zhi,($Xi)
- lea 280(%rsp),%rsi
- mov 0(%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ lea 280+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ mov -48(%rsi),%r15
+.cfi_restore %r15
+ mov -40(%rsi),%r14
+.cfi_restore %r14
+ mov -32(%rsi),%r13
+.cfi_restore %r13
+ mov -24(%rsi),%r12
+.cfi_restore %r12
+ mov -16(%rsi),%rbp
+.cfi_restore %rbp
+ mov -8(%rsi),%rbx
+.cfi_restore %rbx
+ lea 0(%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lghash_epilogue:
ret
+.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
___
@@ -460,7 +505,7 @@ $code.=<<___;
psllq \$57,$Xi #
movdqa $Xi,$T1 #
pslldq \$8,$Xi
- psrldq \$8,$T1 #
+ psrldq \$8,$T1 #
pxor $T2,$Xi
pxor $T1,$Xhi #
@@ -574,7 +619,7 @@ ___
&clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2);
$code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
# experimental alternative. special thing about is that there
- # no dependency between the two multiplications...
+ # no dependency between the two multiplications...
mov \$`0xE1<<1`,%eax
mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
mov \$0x07,%r11d
@@ -749,7 +794,7 @@ $code.=<<___;
movdqa $T2,$T1 #
pslldq \$8,$T2
pclmulqdq \$0x00,$Hkey2,$Xln
- psrldq \$8,$T1 #
+ psrldq \$8,$T1 #
pxor $T2,$Xi
pxor $T1,$Xhi #
movdqu 0($inp),$T1
@@ -885,7 +930,7 @@ $code.=<<___;
psllq \$57,$Xi #
movdqa $Xi,$T1 #
pslldq \$8,$Xi
- psrldq \$8,$T1 #
+ psrldq \$8,$T1 #
pxor $T2,$Xi
pshufd \$0b01001110,$Xhn,$Xmn
pxor $T1,$Xhi #
@@ -1639,14 +1684,20 @@ se_handler:
cmp %r10,%rbx # context->Rip>=epilogue label
jae .Lin_prologue
- lea 24(%rax),%rax # adjust "rsp"
+ lea 48+280(%rax),%rax # adjust "rsp"
mov -8(%rax),%rbx
mov -16(%rax),%rbp
mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
.Lin_prologue:
mov 8(%rax),%rdi
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
index 71457cf4fc59..6a2ac712950b 100755
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -16,7 +23,14 @@
# Relative comparison is therefore more informative. This initial
# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
+# "Initial version" means that there is room for further improvement.
+
+# May 2016
+#
+# 2x aggregated reduction improves performance by 50% (resulting
+# performance on POWER8 is 1 cycle per processed byte), and 4x
+# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.51 cpb.
$flavour=shift;
$output =shift;
@@ -27,14 +41,21 @@ if ($flavour =~ /64/) {
$STU="stdu";
$POP="ld";
$PUSH="std";
+ $UCMP="cmpld";
+ $SHRI="srdi";
} elsif ($flavour =~ /32/) {
$SIZE_T=4;
$LRSAVE=$SIZE_T;
$STU="stwu";
$POP="lwz";
$PUSH="stw";
+ $UCMP="cmplw";
+ $SHRI="srwi";
} else { die "nonsense $flavour"; }
+$sp="r1";
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
+
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -46,6 +67,7 @@ my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
my $vrsave="r12";
$code=<<___;
@@ -56,7 +78,7 @@ $code=<<___;
.globl .gcm_init_p8
.align 5
.gcm_init_p8:
- lis r0,0xfff0
+ li r0,-4096
li r8,0x10
mfspr $vrsave,256
li r9,0x20
@@ -78,17 +100,103 @@ $code=<<___;
vsl $H,$H,$t0 # H<<=1
vsrab $t1,$t1,$t2 # broadcast carry bit
vand $t1,$t1,$xC2
- vxor $H,$H,$t1 # twisted H
+ vxor $IN,$H,$t1 # twisted H
- vsldoi $H,$H,$H,8 # twist even more ...
+ vsldoi $H,$IN,$IN,8 # twist even more ...
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
vsldoi $Hl,$zero,$H,8 # ... and split
vsldoi $Hh,$H,$zero,8
stvx_u $xC2,0,r3 # save pre-computed table
stvx_u $Hl,r8,r3
+ li r8,0x40
stvx_u $H, r9,r3
+ li r9,0x50
stvx_u $Hh,r10,r3
+ li r10,0x60
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
+ vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $IN1,$Xl,$t1
+
+ vsldoi $H2,$IN1,$IN1,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $H2l,r8,r3 # save H^2
+ li r8,0x70
+ stvx_u $H2,r9,r3
+ li r9,0x80
+ stvx_u $H2h,r10,r3
+ li r10,0x90
+___
+{
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+$code.=<<___;
+ vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
+ vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
+ vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
+ vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
+ vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
+ vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vsldoi $t4,$Xm1,$zero,8
+ vsldoi $t5,$zero,$Xm1,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+ vxor $Xl1,$Xl1,$t4
+ vxor $Xh1,$Xh1,$t5
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vsldoi $Xl1,$Xl1,$Xl1,8
+ vxor $Xl,$Xl,$t2
+ vxor $Xl1,$Xl1,$t6
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vpmsumd $Xl1,$Xl1,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $t5,$t5,$Xh1
+ vxor $Xl,$Xl,$t1
+ vxor $Xl1,$Xl1,$t5
+
+ vsldoi $H,$Xl,$Xl,8
+ vsldoi $H2,$Xl1,$Xl1,8
+ vsldoi $Hl,$zero,$H,8
+ vsldoi $Hh,$H,$zero,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $Hl,r8,r3 # save H^3
+ li r8,0xa0
+ stvx_u $H,r9,r3
+ li r9,0xb0
+ stvx_u $Hh,r10,r3
+ li r10,0xc0
+ stvx_u $H2l,r8,r3 # save H^4
+ stvx_u $H2,r9,r3
+ stvx_u $H2h,r10,r3
mtspr 256,$vrsave
blr
@@ -96,7 +204,9 @@ $code=<<___;
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .gcm_init_p8,.-.gcm_init_p8
-
+___
+}
+$code.=<<___;
.globl .gcm_gmult_p8
.align 5
.gcm_gmult_p8:
@@ -122,7 +232,7 @@ $code=<<___;
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
- vpmsumd $t2,$Xl,$xC2 # 1st phase
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
@@ -132,7 +242,7 @@ $code=<<___;
vsldoi $Xl,$Xl,$Xl,8
vxor $Xl,$Xl,$t2
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
vpmsumd $Xl,$Xl,$xC2
vxor $t1,$t1,$Xh
vxor $Xl,$Xl,$t1
@@ -150,7 +260,7 @@ $code=<<___;
.globl .gcm_ghash_p8
.align 5
.gcm_ghash_p8:
- lis r0,0xfff8
+ li r0,-4096
li r8,0x10
mfspr $vrsave,256
li r9,0x20
@@ -159,52 +269,99 @@ $code=<<___;
lvx_u $Xl,0,$Xip # load Xi
lvx_u $Hl,r8,$Htbl # load pre-computed table
+ li r8,0x40
le?lvsl $lemask,r0,r0
lvx_u $H, r9,$Htbl
+ li r9,0x50
le?vspltisb $t0,0x07
lvx_u $Hh,r10,$Htbl
+ li r10,0x60
le?vxor $lemask,$lemask,$t0
lvx_u $xC2,0,$Htbl
le?vperm $Xl,$Xl,$Xl,$lemask
vxor $zero,$zero,$zero
+ ${UCMP}i $len,64
+ bge Lgcm_ghash_p8_4x
+
lvx_u $IN,0,$inp
addi $inp,$inp,16
- subi $len,$len,16
+ subic. $len,$len,16
le?vperm $IN,$IN,$IN,$lemask
vxor $IN,$IN,$Xl
- b Loop
+ beq Lshort
+
+ lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,16
+ lvx_u $H2, r9,$Htbl
+ add r9,$inp,$len # end of input
+ lvx_u $H2h,r10,$Htbl
+ be?b Loop_2x
.align 5
-Loop:
- subic $len,$len,16
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- subfe. r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+Loop_2x:
+ lvx_u $IN1,0,$inp
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ subic $len,$len,32
+ vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
+ vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
+ subfe r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
+ vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
and r0,r0,$len
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
+ vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
add $inp,$inp,r0
- vpmsumd $t2,$Xl,$xC2 # 1st phase
+ vxor $Xl,$Xl,$Xl1
+ vxor $Xm,$Xm,$Xm1
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
vsldoi $t0,$Xm,$zero,8
vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh1
vxor $Xl,$Xl,$t0
vxor $Xh,$Xh,$t1
vsldoi $Xl,$Xl,$Xl,8
vxor $Xl,$Xl,$t2
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
+ lvx_u $IN,r8,$inp
+ addi $inp,$inp,32
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
vpmsumd $Xl,$Xl,$xC2
le?vperm $IN,$IN,$IN,$lemask
vxor $t1,$t1,$Xh
vxor $IN,$IN,$t1
vxor $IN,$IN,$Xl
- beq Loop # did $len-=16 borrow?
+ $UCMP r9,$inp
+ bgt Loop_2x # done yet?
+
+ cmplwi $len,0
+ bne Leven
+
+Lshort:
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+Leven:
vxor $Xl,$Xl,$t1
le?vperm $Xl,$Xl,$Xl,$lemask
stvx_u $Xl,0,$Xip # write out Xi
@@ -214,6 +371,284 @@ Loop:
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
+___
+{
+my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
+ $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
+my $IN0=$IN;
+my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
+
+$code.=<<___;
+.align 5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ stvx v20,r10,$sp
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ li r10,0x60
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME-4`($sp) # save vrsave
+ mtspr 256,r0 # preserve all AltiVec registers
+
+ lvsl $t0,0,r8 # 0x0001..0e0f
+ #lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,0x70
+ lvx_u $H2, r9,$Htbl
+ li r9,0x80
+ vspltisb $t1,8 # 0x0808..0808
+ #lvx_u $H2h,r10,$Htbl
+ li r10,0x90
+ lvx_u $H3l,r8,$Htbl # load H^3
+ li r8,0xa0
+ lvx_u $H3, r9,$Htbl
+ li r9,0xb0
+ lvx_u $H3h,r10,$Htbl
+ li r10,0xc0
+ lvx_u $H4l,r8,$Htbl # load H^4
+ li r8,0x10
+ lvx_u $H4, r9,$Htbl
+ li r9,0x20
+ lvx_u $H4h,r10,$Htbl
+ li r10,0x30
+
+ vsldoi $t2,$zero,$t1,8 # 0x0000..0808
+ vaddubm $hiperm,$t0,$t2 # 0x0001..1617
+ vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
+
+ $SHRI $len,$len,4 # this allows to use sign bit
+ # as carry
+ lvx_u $IN0,0,$inp # load input
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,8
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+
+ vxor $Xh,$IN0,$Xl
+
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vperm $H21l,$H2,$H,$hiperm
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $H21h,$H2,$H,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+ vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
+
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh3,$Xh3,$Xh1
+
+ blt Ltail_4x
+
+Loop_4x:
+ lvx_u $IN0,0,$inp
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,4
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+ vxor $Xh,$Xh,$Xh3
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xl,$Xl,$xC2
+
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xh3,$Xh3,$Xh1
+ vxor $Xh,$Xh,$IN0
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xh,$Xh,$t1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh,$Xh,$Xl
+ bge Loop_4x
+
+Ltail_4x:
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh3
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ addic. $len,$len,4
+ beq Ldone_4x
+
+ lvx_u $IN0,0,$inp
+ ${UCMP}i $len,2
+ li $len,-4
+ blt Lone
+ lvx_u $IN1,r8,$inp
+ beq Ltwo
+
+Lthree:
+ lvx_u $IN2,r9,$inp
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vmr $H4l,$H3l
+ vmr $H4, $H3
+ vmr $H4h,$H3h
+
+ vperm $t0,$IN1,$IN2,$loperm
+ vperm $t1,$IN1,$IN2,$hiperm
+ vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
+ vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
+
+ vxor $Xm3,$Xm3,$Xm2
+ b Ltail_4x
+
+.align 4
+Ltwo:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vperm $t0,$zero,$IN1,$loperm
+ vperm $t1,$zero,$IN1,$hiperm
+
+ vsldoi $H4l,$zero,$H2,8
+ vmr $H4, $H2
+ vsldoi $H4h,$H2,$zero,8
+
+ vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
+ vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
+ vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
+
+ b Ltail_4x
+
+.align 4
+Lone:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vsldoi $H4l,$zero,$H,8
+ vmr $H4, $H
+ vsldoi $H4h,$H,$zero,8
+
+ vxor $Xh,$IN0,$Xl
+ vxor $Xl3,$Xl3,$Xl3
+ vxor $Xm3,$Xm3,$Xm3
+ vxor $Xh3,$Xh3,$Xh3
+
+ b Ltail_4x
+
+Ldone_4x:
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,0,4,0
+ .long 0
+___
+}
+$code.=<<___;
.size .gcm_ghash_p8,.-.gcm_ghash_p8
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
@@ -221,6 +656,8 @@ Loop:
___
foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
if ($flavour =~ /le$/o) { # little-endian
s/le\?//o or
s/be\?/#be#/o;
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index 0886d2180702..47e882008069 100755
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -12,26 +19,42 @@
# June 2014
#
# Initial version was developed in tight cooperation with Ard
-# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from
-# other assembly modules. Just like aesv8-armx.pl this module
-# supports both AArch32 and AArch64 execution modes.
+# Biesheuvel of Linaro from bits-n-pieces from other assembly modules.
+# Just like aesv8-armx.pl this module supports both AArch32 and
+# AArch64 execution modes.
#
# July 2014
#
# Implement 2x aggregated reduction [see ghash-x86.pl for background
# information].
#
+# November 2017
+#
+# AArch64 register bank to "accommodate" 4x aggregated reduction and
+# improve performance by 20-70% depending on processor.
+#
# Current performance in cycles per processed byte:
#
-# PMULL[2] 32-bit NEON(*)
-# Apple A7 0.92 5.62
-# Cortex-A53 1.01 8.39
-# Cortex-A57 1.17 7.61
+# 64-bit PMULL 32-bit PMULL 32-bit NEON(*)
+# Apple A7 0.58 0.92 5.62
+# Cortex-A53 0.85 1.01 8.39
+# Cortex-A57 0.73 1.17 7.61
+# Denver 0.51 0.65 6.02
+# Mongoose 0.65 1.10 8.06
+# Kryo 0.76 1.16 8.00
#
# (*) presented for reference/comparison purposes;
$flavour = shift;
-open STDOUT,">".shift;
+$output = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
$Xi="x0"; # argument block
$Htbl="x1";
@@ -47,10 +70,15 @@ my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
$code=<<___;
#include "arm_arch.h"
+#if __ARM_MAX_ARCH__>=7
.text
___
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
-$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
+$code.=<<___ if ($flavour !~ /64/);
+.fpu neon
+.code 32
+#undef __thumb2__
+___
################################################################################
# void gcm_init_v8(u128 Htable[16],const u64 H[2]);
@@ -109,8 +137,56 @@ gcm_init_v8:
vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing
veor $t1,$t1,$H2
vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed
- vst1.64 {$Hhl-$H2},[x0] @ store Htable[1..2]
+ vst1.64 {$Hhl-$H2},[x0],#32 @ store Htable[1..2]
+___
+if ($flavour =~ /64/) {
+my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7));
+
+$code.=<<___;
+ @ calculate H^3 and H^4
+ vpmull.p64 $Xl,$H, $H2
+ vpmull.p64 $Yl,$H2,$H2
+ vpmull2.p64 $Xh,$H, $H2
+ vpmull2.p64 $Yh,$H2,$H2
+ vpmull.p64 $Xm,$t0,$t1
+ vpmull.p64 $Ym,$t1,$t1
+
+ vext.8 $t0,$Xl,$Xh,#8 @ Karatsuba post-processing
+ vext.8 $t1,$Yl,$Yh,#8
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t0
+ veor $t3,$Yl,$Yh
+ veor $Ym,$Ym,$t1
+ veor $Xm,$Xm,$t2
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase
+ veor $Ym,$Ym,$t3
+ vpmull.p64 $t3,$Yl,$xC2
+
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Yh#lo,$Ym#hi
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ vmov $Ym#hi,$Yl#lo
+ veor $Xl,$Xm,$t2
+ veor $Yl,$Ym,$t3
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase
+ vext.8 $t3,$Yl,$Yl,#8
+ vpmull.p64 $Xl,$Xl,$xC2
+ vpmull.p64 $Yl,$Yl,$xC2
+ veor $t2,$t2,$Xh
+ veor $t3,$t3,$Yh
+ veor $H, $Xl,$t2 @ H^3
+ veor $H2,$Yl,$t3 @ H^4
+ vext.8 $t0,$H, $H,#8 @ Karatsuba pre-processing
+ vext.8 $t1,$H2,$H2,#8
+ veor $t0,$t0,$H
+ veor $t1,$t1,$H2
+ vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed
+ vst1.64 {$H-$H2},[x0] @ store Htable[3..5]
+___
+}
+$code.=<<___;
ret
.size gcm_init_v8,.-gcm_init_v8
___
@@ -179,6 +255,10 @@ $code.=<<___;
.align 4
gcm_ghash_v8:
___
+$code.=<<___ if ($flavour =~ /64/);
+ cmp $len,#64
+ b.hs .Lgcm_ghash_v8_4x
+___
$code.=<<___ if ($flavour !~ /64/);
vstmdb sp!,{d8-d15} @ 32-bit ABI says so
___
@@ -188,13 +268,13 @@ $code.=<<___;
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
- @ alorithm specification
+ @ algorithm specification
subs $len,$len,#32 @ see if $len is 32 or larger
mov $inc,#16 @ $inc is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ $inc is zeroed just in time
- @ to preclude oversteping
+ @ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@@ -326,10 +406,301 @@ $code.=<<___;
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
___
+
+if ($flavour =~ /64/) { # 4x subroutine
+my ($I0,$j1,$j2,$j3,
+ $I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23));
+
+$code.=<<___;
+.type gcm_ghash_v8_4x,%function
+.align 4
+gcm_ghash_v8_4x:
+.Lgcm_ghash_v8_4x:
+ vld1.64 {$Xl},[$Xi] @ load [rotated] Xi
+ vld1.64 {$H-$H2},[$Htbl],#48 @ load twisted H, ..., H^2
+ vmov.i8 $xC2,#0xe1
+ vld1.64 {$H3-$H4},[$Htbl] @ load twisted H^3, ..., H^4
+ vshl.u64 $xC2,$xC2,#57 @ compose 0xc2.0 constant
+
+ vld1.64 {$I0-$j3},[$inp],#64
+#ifndef __ARMEB__
+ vrev64.8 $Xl,$Xl
+ vrev64.8 $j1,$j1
+ vrev64.8 $j2,$j2
+ vrev64.8 $j3,$j3
+ vrev64.8 $I0,$I0
+#endif
+ vext.8 $I3,$j3,$j3,#8
+ vext.8 $I2,$j2,$j2,#8
+ vext.8 $I1,$j1,$j1,#8
+
+ vpmull.p64 $Yl,$H,$I3 @ H·Ii+3
+ veor $j3,$j3,$I3
+ vpmull2.p64 $Yh,$H,$I3
+ vpmull.p64 $Ym,$Hhl,$j3
+
+ vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2
+ veor $j2,$j2,$I2
+ vpmull2.p64 $I2,$H2,$I2
+ vpmull2.p64 $j2,$Hhl,$j2
+
+ veor $Yl,$Yl,$t0
+ veor $Yh,$Yh,$I2
+ veor $Ym,$Ym,$j2
+
+ vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1
+ veor $j1,$j1,$I1
+ vpmull2.p64 $I1,$H3,$I1
+ vpmull.p64 $j1,$H34,$j1
+
+ veor $Yl,$Yl,$j3
+ veor $Yh,$Yh,$I1
+ veor $Ym,$Ym,$j1
+
+ subs $len,$len,#128
+ b.lo .Ltail4x
+
+ b .Loop4x
+
+.align 4
+.Loop4x:
+ veor $t0,$I0,$Xl
+ vld1.64 {$I0-$j3},[$inp],#64
+ vext.8 $IN,$t0,$t0,#8
+#ifndef __ARMEB__
+ vrev64.8 $j1,$j1
+ vrev64.8 $j2,$j2
+ vrev64.8 $j3,$j3
+ vrev64.8 $I0,$I0
+#endif
+
+ vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii)
+ veor $t0,$t0,$IN
+ vpmull2.p64 $Xh,$H4,$IN
+ vext.8 $I3,$j3,$j3,#8
+ vpmull2.p64 $Xm,$H34,$t0
+
+ veor $Xl,$Xl,$Yl
+ veor $Xh,$Xh,$Yh
+ vext.8 $I2,$j2,$j2,#8
+ veor $Xm,$Xm,$Ym
+ vext.8 $I1,$j1,$j1,#8
+
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ vpmull.p64 $Yl,$H,$I3 @ H·Ii+3
+ veor $j3,$j3,$I3
+ veor $Xm,$Xm,$t1
+ vpmull2.p64 $Yh,$H,$I3
+ veor $Xm,$Xm,$t2
+ vpmull.p64 $Ym,$Hhl,$j3
+
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2
+ veor $j2,$j2,$I2
+ vpmull2.p64 $I2,$H2,$I2
+ veor $Xl,$Xm,$t2
+ vpmull2.p64 $j2,$Hhl,$j2
+
+ veor $Yl,$Yl,$t0
+ veor $Yh,$Yh,$I2
+ veor $Ym,$Ym,$j2
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
+ vpmull.p64 $Xl,$Xl,$xC2
+ vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1
+ veor $j1,$j1,$I1
+ veor $t2,$t2,$Xh
+ vpmull2.p64 $I1,$H3,$I1
+ vpmull.p64 $j1,$H34,$j1
+
+ veor $Xl,$Xl,$t2
+ veor $Yl,$Yl,$j3
+ veor $Yh,$Yh,$I1
+ vext.8 $Xl,$Xl,$Xl,#8
+ veor $Ym,$Ym,$j1
+
+ subs $len,$len,#64
+ b.hs .Loop4x
+
+.Ltail4x:
+ veor $t0,$I0,$Xl
+ vext.8 $IN,$t0,$t0,#8
+
+ vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii)
+ veor $t0,$t0,$IN
+ vpmull2.p64 $Xh,$H4,$IN
+ vpmull2.p64 $Xm,$H34,$t0
+
+ veor $Xl,$Xl,$Yl
+ veor $Xh,$Xh,$Yh
+ veor $Xm,$Xm,$Ym
+
+ adds $len,$len,#64
+ b.eq .Ldone4x
+
+ cmp $len,#32
+ b.lo .Lone
+ b.eq .Ltwo
+.Lthree:
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t1
+ vld1.64 {$I0-$j2},[$inp]
+ veor $Xm,$Xm,$t2
+#ifndef __ARMEB__
+ vrev64.8 $j1,$j1
+ vrev64.8 $j2,$j2
+ vrev64.8 $I0,$I0
+#endif
+
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ vext.8 $I2,$j2,$j2,#8
+ vext.8 $I1,$j1,$j1,#8
+ veor $Xl,$Xm,$t2
+
+ vpmull.p64 $Yl,$H,$I2 @ H·Ii+2
+ veor $j2,$j2,$I2
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
+ vpmull.p64 $Xl,$Xl,$xC2
+ veor $t2,$t2,$Xh
+ vpmull2.p64 $Yh,$H,$I2
+ vpmull.p64 $Ym,$Hhl,$j2
+ veor $Xl,$Xl,$t2
+ vpmull.p64 $j3,$H2,$I1 @ H^2·Ii+1
+ veor $j1,$j1,$I1
+ vext.8 $Xl,$Xl,$Xl,#8
+
+ vpmull2.p64 $I1,$H2,$I1
+ veor $t0,$I0,$Xl
+ vpmull2.p64 $j1,$Hhl,$j1
+ vext.8 $IN,$t0,$t0,#8
+
+ veor $Yl,$Yl,$j3
+ veor $Yh,$Yh,$I1
+ veor $Ym,$Ym,$j1
+
+ vpmull.p64 $Xl,$H3,$IN @ H^3·(Xi+Ii)
+ veor $t0,$t0,$IN
+ vpmull2.p64 $Xh,$H3,$IN
+ vpmull.p64 $Xm,$H34,$t0
+
+ veor $Xl,$Xl,$Yl
+ veor $Xh,$Xh,$Yh
+ veor $Xm,$Xm,$Ym
+ b .Ldone4x
+
+.align 4
+.Ltwo:
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t1
+ vld1.64 {$I0-$j1},[$inp]
+ veor $Xm,$Xm,$t2
+#ifndef __ARMEB__
+ vrev64.8 $j1,$j1
+ vrev64.8 $I0,$I0
+#endif
+
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ vext.8 $I1,$j1,$j1,#8
+ veor $Xl,$Xm,$t2
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
+ vpmull.p64 $Xl,$Xl,$xC2
+ veor $t2,$t2,$Xh
+ veor $Xl,$Xl,$t2
+ vext.8 $Xl,$Xl,$Xl,#8
+
+ vpmull.p64 $Yl,$H,$I1 @ H·Ii+1
+ veor $j1,$j1,$I1
+
+ veor $t0,$I0,$Xl
+ vext.8 $IN,$t0,$t0,#8
+
+ vpmull2.p64 $Yh,$H,$I1
+ vpmull.p64 $Ym,$Hhl,$j1
+
+ vpmull.p64 $Xl,$H2,$IN @ H^2·(Xi+Ii)
+ veor $t0,$t0,$IN
+ vpmull2.p64 $Xh,$H2,$IN
+ vpmull2.p64 $Xm,$Hhl,$t0
+
+ veor $Xl,$Xl,$Yl
+ veor $Xh,$Xh,$Yh
+ veor $Xm,$Xm,$Ym
+ b .Ldone4x
+
+.align 4
+.Lone:
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t1
+ vld1.64 {$I0},[$inp]
+ veor $Xm,$Xm,$t2
+#ifndef __ARMEB__
+ vrev64.8 $I0,$I0
+#endif
+
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ veor $Xl,$Xm,$t2
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
+ vpmull.p64 $Xl,$Xl,$xC2
+ veor $t2,$t2,$Xh
+ veor $Xl,$Xl,$t2
+ vext.8 $Xl,$Xl,$Xl,#8
+
+ veor $t0,$I0,$Xl
+ vext.8 $IN,$t0,$t0,#8
+
+ vpmull.p64 $Xl,$H,$IN
+ veor $t0,$t0,$IN
+ vpmull2.p64 $Xh,$H,$IN
+ vpmull.p64 $Xm,$Hhl,$t0
+
+.Ldone4x:
+ vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
+ veor $t2,$Xl,$Xh
+ veor $Xm,$Xm,$t1
+ veor $Xm,$Xm,$t2
+
+ vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
+ vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
+ vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
+ veor $Xl,$Xm,$t2
+
+ vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
+ vpmull.p64 $Xl,$Xl,$xC2
+ veor $t2,$t2,$Xh
+ veor $Xl,$Xl,$t2
+ vext.8 $Xl,$Xl,$Xl,#8
+
+#ifndef __ARMEB__
+ vrev64.8 $Xl,$Xl
+#endif
+ vst1.64 {$Xl},[$Xi] @ write out Xi
+
+ ret
+.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
+___
+
+}
}
+
$code.=<<___;
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#endif
___
if ($flavour =~ /64/) { ######## 64-bit code
@@ -337,7 +708,8 @@ if ($flavour =~ /64/) { ######## 64-bit code
my $arg=shift;
$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
- sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+ sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
+ $3<8?$3:$3+8,($4 eq "lo")?0:1;
}
foreach(split("\n",$code)) {
s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
@@ -352,7 +724,7 @@ if ($flavour =~ /64/) { ######## 64-bit code
s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
s/@\s/\/\//o; # old->new style commentary
- # fix up remainig legacy suffixes
+ # fix up remaining legacy suffixes
s/\.[ui]?8(\s)/$1/o;
s/\.[uis]?32//o and s/\.16b/\.4s/go;
m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument
@@ -392,7 +764,7 @@ if ($flavour =~ /64/) { ######## 64-bit code
s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
s/\/\/\s?/@ /o; # new->old style commentary
- # fix up remainig new-style suffixes
+ # fix up remaining new-style suffixes
s/\],#[0-9]+/]!/o;
s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
diff --git a/crypto/modes/build.info b/crypto/modes/build.info
new file mode 100644
index 000000000000..821340eb909a
--- /dev/null
+++ b/crypto/modes/build.info
@@ -0,0 +1,30 @@
+LIBS=../../libcrypto
+SOURCE[../../libcrypto]=\
+ cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
+ ccm128.c xts128.c wrap128.c ocb128.c \
+ {- $target{modes_asm_src} -}
+
+INCLUDE[gcm128.o]=..
+
+GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
+GENERATE[ghash-x86.s]=asm/ghash-x86.pl \
+ $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
+GENERATE[ghash-x86_64.s]=asm/ghash-x86_64.pl $(PERLASM_SCHEME)
+GENERATE[aesni-gcm-x86_64.s]=asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME)
+GENERATE[ghash-sparcv9.S]=asm/ghash-sparcv9.pl $(PERLASM_SCHEME)
+INCLUDE[ghash-sparcv9.o]=..
+GENERATE[ghash-alpha.S]=asm/ghash-alpha.pl $(PERLASM_SCHEME)
+GENERATE[ghash-parisc.s]=asm/ghash-parisc.pl $(PERLASM_SCHEME)
+GENERATE[ghashp8-ppc.s]=asm/ghashp8-ppc.pl $(PERLASM_SCHEME)
+GENERATE[ghash-armv4.S]=asm/ghash-armv4.pl $(PERLASM_SCHEME)
+INCLUDE[ghash-armv4.o]=..
+GENERATE[ghashv8-armx.S]=asm/ghashv8-armx.pl $(PERLASM_SCHEME)
+INCLUDE[ghashv8-armx.o]=..
+GENERATE[ghash-s390x.S]=asm/ghash-s390x.pl $(PERLASM_SCHEME)
+INCLUDE[ghash-s390x.o]=..
+
+BEGINRAW[Makefile]
+# GNU make "catch all"
+{- $builddir -}/ghash-%.S: {- $sourcedir -}/asm/ghash-%.pl
+ CC="$(CC)" $(PERL) $< $(PERLASM_SCHEME) $@
+ENDRAW[Makefile]
diff --git a/crypto/modes/cbc128.c b/crypto/modes/cbc128.c
index c13caea5355b..4ce5eb2ae341 100644
--- a/crypto/modes/cbc128.c
+++ b/crypto/modes/cbc128.c
@@ -1,64 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+/*
+ * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
*
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
#if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC)
# define STRICT_ALIGNMENT 0
#endif
@@ -70,7 +22,8 @@ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
size_t n;
const unsigned char *iv = ivec;
- assert(in && out && key && ivec);
+ if (len == 0)
+ return;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (STRICT_ALIGNMENT &&
@@ -123,7 +76,8 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
unsigned char c[16];
} tmp;
- assert(in && out && key && ivec);
+ if (len == 0)
+ return;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (in != out) {
diff --git a/crypto/modes/ccm128.c b/crypto/modes/ccm128.c
index c1ded0f9148a..85ce84f10d80 100644
--- a/crypto/modes/ccm128.c
+++ b/crypto/modes/ccm128.c
@@ -1,63 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+/*
+ * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
*
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
/*
* First you setup M and L parameters and pass the key schedule. This is
* called once per session setup...
diff --git a/crypto/modes/cfb128.c b/crypto/modes/cfb128.c
index d4ecbd08eed1..e439567fe59f 100644
--- a/crypto/modes/cfb128.c
+++ b/crypto/modes/cfb128.c
@@ -1,64 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+/*
+ * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
*
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
/*
* The input and output encrypted as though 128bit cfb mode is being used.
* The extra state information to record how much of the 128bit block we have
@@ -72,8 +24,6 @@ void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
unsigned int n;
size_t l = 0;
- assert(in && out && key && ivec && num);
-
n = *num;
if (enc) {
@@ -190,7 +140,7 @@ static void cfbr_encrypt_block(const unsigned char *in, unsigned char *out,
block128_f block)
{
int n, rem, num;
- unsigned char ovec[16 * 2 + 1]; /* +1 because we dererefence (but don't
+ unsigned char ovec[16 * 2 + 1]; /* +1 because we dereference (but don't
* use) one byte off the end */
if (nbits <= 0 || nbits > 128)
@@ -228,9 +178,6 @@ void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
size_t n;
unsigned char c[1], d[1];
- assert(in && out && key && ivec && num);
- assert(*num == 0);
-
for (n = 0; n < bits; ++n) {
c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0;
cfbr_encrypt_block(c, d, 1, key, ivec, enc, block);
@@ -246,9 +193,6 @@ void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
{
size_t n;
- assert(in && out && key && ivec && num);
- assert(*num == 0);
-
for (n = 0; n < length; ++n)
cfbr_encrypt_block(&in[n], &out[n], 8, key, ivec, enc, block);
}
diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c
index d4b22728e623..03920b447333 100644
--- a/crypto/modes/ctr128.c
+++ b/crypto/modes/ctr128.c
@@ -1,64 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+/*
+ * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
*
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
/*
* NOTE: the IV/counter CTR mode is big-endian. The code itself is
* endian-neutral.
@@ -113,7 +65,7 @@ static void ctr128_inc_aligned(unsigned char *counter)
* before the first call to CRYPTO_ctr128_encrypt(). This algorithm assumes
* that the counter is in the x lower bits of the IV (ivec), and that the
* application has full control over overflow and the rest of the IV. This
- * implementation takes NO responsability for checking that the counter
+ * implementation takes NO responsibility for checking that the counter
* doesn't overflow into the rest of the IV when incremented.
*/
void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
@@ -125,9 +77,6 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
unsigned int n;
size_t l = 0;
- assert(in && out && key && ecount_buf && num);
- assert(*num < 16);
-
n = *num;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
@@ -203,9 +152,6 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
{
unsigned int n, ctr32;
- assert(in && out && key && ecount_buf && num);
- assert(*num < 16);
-
n = *num;
while (n && len) {
@@ -238,7 +184,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
(*func) (in, out, blocks, key, ivec);
/* (*ctr) does not update ivec, caller does: */
PUTU32(ivec + 12, ctr32);
- /* ... overflow was detected, propogate carry. */
+ /* ... overflow was detected, propagate carry. */
if (ctr32 == 0)
ctr96_inc(ivec);
blocks *= 16;
diff --git a/crypto/modes/cts128.c b/crypto/modes/cts128.c
index 137be595a105..93826a1e2f06 100644
--- a/crypto/modes/cts128.c
+++ b/crypto/modes/cts128.c
@@ -1,21 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+/*
+ * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
*
- * Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
/*
* Trouble with Ciphertext Stealing, CTS, mode is that there is no
* common official specification, but couple of cipher/application
@@ -36,8 +31,6 @@ size_t CRYPTO_cts128_encrypt_block(const unsigned char *in,
{
size_t residue, n;
- assert(in && out && key && ivec);
-
if (len <= 16)
return 0;
@@ -68,8 +61,6 @@ size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in,
{
size_t residue, n;
- assert(in && out && key && ivec);
-
if (len < 16)
return 0;
@@ -103,8 +94,6 @@ size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
unsigned char c[16];
} tmp;
- assert(in && out && key && ivec);
-
if (len <= 16)
return 0;
@@ -141,8 +130,6 @@ size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
unsigned char c[16];
} tmp;
- assert(in && out && key && ivec);
-
if (len < 16)
return 0;
@@ -179,8 +166,6 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in,
unsigned char c[32];
} tmp;
- assert(in && out && key && ivec);
-
if (len <= 16)
return 0;
@@ -224,8 +209,6 @@ size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in,
unsigned char c[32];
} tmp;
- assert(in && out && key && ivec);
-
if (len < 16)
return 0;
@@ -272,8 +255,6 @@ size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
unsigned char c[32];
} tmp;
- assert(in && out && key && ivec);
-
if (len <= 16)
return 0;
@@ -314,8 +295,6 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
unsigned char c[32];
} tmp;
- assert(in && out && key && ivec);
-
if (len < 16)
return 0;
@@ -349,196 +328,3 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
#endif
return 16 + len + residue;
}
-
-#if defined(SELFTEST)
-# include <stdio.h>
-# include <openssl/aes.h>
-
-/* test vectors from RFC 3962 */
-static const unsigned char test_key[16] = "chicken teriyaki";
-static const unsigned char test_input[64] =
- "I would like the" " General Gau's C"
- "hicken, please, " "and wonton soup.";
-static const unsigned char test_iv[16] =
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static const unsigned char vector_17[17] = {
- 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
- 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f,
- 0x97
-};
-
-static const unsigned char vector_31[31] = {
- 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
- 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22,
- 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
- 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5
-};
-
-static const unsigned char vector_32[32] = {
- 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
- 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
- 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
- 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84
-};
-
-static const unsigned char vector_47[47] = {
- 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
- 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
- 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
- 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e,
- 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
- 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5
-};
-
-static const unsigned char vector_48[48] = {
- 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
- 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
- 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
- 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8,
- 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
- 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
-};
-
-static const unsigned char vector_64[64] = {
- 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
- 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
- 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
- 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
- 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
- 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40,
- 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
- 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
-};
-
-static AES_KEY encks, decks;
-
-void test_vector(const unsigned char *vector, size_t len)
-{
- unsigned char iv[sizeof(test_iv)];
- unsigned char cleartext[64], ciphertext[64];
- size_t tail;
-
- printf("vector_%d\n", len);
- fflush(stdout);
-
- if ((tail = len % 16) == 0)
- tail = 16;
- tail += 16;
-
- /* test block-based encryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_cts128_encrypt_block(test_input, ciphertext, len, &encks, iv,
- (block128_f) AES_encrypt);
- if (memcmp(ciphertext, vector, len))
- fprintf(stderr, "output_%d mismatch\n", len), exit(1);
- if (memcmp(iv, vector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(1);
-
- /* test block-based decryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_cts128_decrypt_block(ciphertext, cleartext, len, &decks, iv,
- (block128_f) AES_decrypt);
- if (memcmp(cleartext, test_input, len))
- fprintf(stderr, "input_%d mismatch\n", len), exit(2);
- if (memcmp(iv, vector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(2);
-
- /* test streamed encryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_cts128_encrypt(test_input, ciphertext, len, &encks, iv,
- (cbc128_f) AES_cbc_encrypt);
- if (memcmp(ciphertext, vector, len))
- fprintf(stderr, "output_%d mismatch\n", len), exit(3);
- if (memcmp(iv, vector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(3);
-
- /* test streamed decryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_cts128_decrypt(ciphertext, cleartext, len, &decks, iv,
- (cbc128_f) AES_cbc_encrypt);
- if (memcmp(cleartext, test_input, len))
- fprintf(stderr, "input_%d mismatch\n", len), exit(4);
- if (memcmp(iv, vector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(4);
-}
-
-void test_nistvector(const unsigned char *vector, size_t len)
-{
- unsigned char iv[sizeof(test_iv)];
- unsigned char cleartext[64], ciphertext[64], nistvector[64];
- size_t tail;
-
- printf("nistvector_%d\n", len);
- fflush(stdout);
-
- if ((tail = len % 16) == 0)
- tail = 16;
-
- len -= 16 + tail;
- memcpy(nistvector, vector, len);
- /* flip two last blocks */
- memcpy(nistvector + len, vector + len + 16, tail);
- memcpy(nistvector + len + tail, vector + len, 16);
- len += 16 + tail;
- tail = 16;
-
- /* test block-based encryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_nistcts128_encrypt_block(test_input, ciphertext, len, &encks, iv,
- (block128_f) AES_encrypt);
- if (memcmp(ciphertext, nistvector, len))
- fprintf(stderr, "output_%d mismatch\n", len), exit(1);
- if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(1);
-
- /* test block-based decryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_nistcts128_decrypt_block(ciphertext, cleartext, len, &decks, iv,
- (block128_f) AES_decrypt);
- if (memcmp(cleartext, test_input, len))
- fprintf(stderr, "input_%d mismatch\n", len), exit(2);
- if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(2);
-
- /* test streamed encryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_nistcts128_encrypt(test_input, ciphertext, len, &encks, iv,
- (cbc128_f) AES_cbc_encrypt);
- if (memcmp(ciphertext, nistvector, len))
- fprintf(stderr, "output_%d mismatch\n", len), exit(3);
- if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(3);
-
- /* test streamed decryption */
- memcpy(iv, test_iv, sizeof(test_iv));
- CRYPTO_nistcts128_decrypt(ciphertext, cleartext, len, &decks, iv,
- (cbc128_f) AES_cbc_encrypt);
- if (memcmp(cleartext, test_input, len))
- fprintf(stderr, "input_%d mismatch\n", len), exit(4);
- if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
- fprintf(stderr, "iv_%d mismatch\n", len), exit(4);
-}
-
-int main()
-{
- AES_set_encrypt_key(test_key, 128, &encks);
- AES_set_decrypt_key(test_key, 128, &decks);
-
- test_vector(vector_17, sizeof(vector_17));
- test_vector(vector_31, sizeof(vector_31));
- test_vector(vector_32, sizeof(vector_32));
- test_vector(vector_47, sizeof(vector_47));
- test_vector(vector_48, sizeof(vector_48));
- test_vector(vector_64, sizeof(vector_64));
-
- test_nistvector(vector_17, sizeof(vector_17));
- test_nistvector(vector_31, sizeof(vector_31));
- test_nistvector(vector_32, sizeof(vector_32));
- test_nistvector(vector_47, sizeof(vector_47));
- test_nistvector(vector_48, sizeof(vector_48));
- test_nistvector(vector_64, sizeof(vector_64));
-
- return 0;
-}
-#endif
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
index e299131c1382..15f76e3e86bc 100644
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -1,65 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+/*
+ * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
*
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
-#define OPENSSL_FIPSAPI
-
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
/* redefine, because alignment is ensured */
# undef GETU32
@@ -150,9 +101,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
static const size_t rem_8bit[256] = {
PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
@@ -260,7 +209,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
}
}
-# define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
#elif TABLE_BITS==4
@@ -321,9 +270,7 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2])
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
if (is_endian.little)
for (j = 0; j < 16; ++j) {
@@ -356,9 +303,7 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
nlo = ((const u8 *)Xi)[15];
nhi = nlo >> 4;
@@ -437,9 +382,7 @@ static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
# if 1
do {
@@ -607,7 +550,7 @@ void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
size_t len);
# endif
-# define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
# define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
/*
@@ -629,9 +572,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
V.hi = H[0]; /* H is in host byte order, no byte swapping */
V.lo = H[1];
@@ -683,7 +624,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
}
}
-# define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
+# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
#endif
@@ -762,7 +703,7 @@ void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
#ifdef GCM_FUNCREF_4BIT
# undef GCM_MUL
-# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
# ifdef GHASH
# undef GHASH
# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
@@ -774,9 +715,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
memset(ctx, 0, sizeof(*ctx));
ctx->block = block;
@@ -801,18 +740,22 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
#if TABLE_BITS==8
gcm_init_8bit(ctx->Htable, ctx->H.u);
#elif TABLE_BITS==4
+# if defined(GHASH)
+# define CTX__GHASH(f) (ctx->ghash = (f))
+# else
+# define CTX__GHASH(f) (ctx->ghash = NULL)
+# endif
# if defined(GHASH_ASM_X86_OR_64)
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
- if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
- OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
+ if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
gcm_init_avx(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_avx;
- ctx->ghash = gcm_ghash_avx;
+ CTX__GHASH(gcm_ghash_avx);
} else {
gcm_init_clmul(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_clmul;
- ctx->ghash = gcm_ghash_clmul;
+ CTX__GHASH(gcm_ghash_clmul);
}
return;
}
@@ -825,66 +768,59 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
# endif
ctx->gmult = gcm_gmult_4bit_mmx;
- ctx->ghash = gcm_ghash_4bit_mmx;
+ CTX__GHASH(gcm_ghash_4bit_mmx);
} else {
ctx->gmult = gcm_gmult_4bit_x86;
- ctx->ghash = gcm_ghash_4bit_x86;
+ CTX__GHASH(gcm_ghash_4bit_x86);
}
# else
ctx->gmult = gcm_gmult_4bit;
- ctx->ghash = gcm_ghash_4bit;
+ CTX__GHASH(gcm_ghash_4bit);
# endif
# elif defined(GHASH_ASM_ARM)
# ifdef PMULL_CAPABLE
if (PMULL_CAPABLE) {
gcm_init_v8(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_v8;
- ctx->ghash = gcm_ghash_v8;
+ CTX__GHASH(gcm_ghash_v8);
} else
# endif
# ifdef NEON_CAPABLE
if (NEON_CAPABLE) {
gcm_init_neon(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_neon;
- ctx->ghash = gcm_ghash_neon;
+ CTX__GHASH(gcm_ghash_neon);
} else
# endif
{
gcm_init_4bit(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_4bit;
-# if defined(GHASH)
- ctx->ghash = gcm_ghash_4bit;
-# else
- ctx->ghash = NULL;
-# endif
+ CTX__GHASH(gcm_ghash_4bit);
}
# elif defined(GHASH_ASM_SPARC)
if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
gcm_init_vis3(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_vis3;
- ctx->ghash = gcm_ghash_vis3;
+ CTX__GHASH(gcm_ghash_vis3);
} else {
gcm_init_4bit(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_4bit;
- ctx->ghash = gcm_ghash_4bit;
+ CTX__GHASH(gcm_ghash_4bit);
}
# elif defined(GHASH_ASM_PPC)
if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
gcm_init_p8(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_p8;
- ctx->ghash = gcm_ghash_p8;
+ CTX__GHASH(gcm_ghash_p8);
} else {
gcm_init_4bit(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_4bit;
-# if defined(GHASH)
- ctx->ghash = gcm_ghash_4bit;
-# else
- ctx->ghash = NULL;
-# endif
+ CTX__GHASH(gcm_ghash_4bit);
}
# else
gcm_init_4bit(ctx->Htable, ctx->H.u);
# endif
+# undef CTX__GHASH
#endif
}
@@ -894,18 +830,12 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
unsigned int ctr;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
#endif
- ctx->Yi.u[0] = 0;
- ctx->Yi.u[1] = 0;
- ctx->Xi.u[0] = 0;
- ctx->Xi.u[1] = 0;
ctx->len.u[0] = 0; /* AAD length */
ctx->len.u[1] = 0; /* message length */
ctx->ares = 0;
@@ -913,53 +843,68 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
if (len == 12) {
memcpy(ctx->Yi.c, iv, 12);
+ ctx->Yi.c[12] = 0;
+ ctx->Yi.c[13] = 0;
+ ctx->Yi.c[14] = 0;
ctx->Yi.c[15] = 1;
ctr = 1;
} else {
size_t i;
u64 len0 = len;
+ /* Borrow ctx->Xi to calculate initial Yi */
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+
while (len >= 16) {
for (i = 0; i < 16; ++i)
- ctx->Yi.c[i] ^= iv[i];
- GCM_MUL(ctx, Yi);
+ ctx->Xi.c[i] ^= iv[i];
+ GCM_MUL(ctx);
iv += 16;
len -= 16;
}
if (len) {
for (i = 0; i < len; ++i)
- ctx->Yi.c[i] ^= iv[i];
- GCM_MUL(ctx, Yi);
+ ctx->Xi.c[i] ^= iv[i];
+ GCM_MUL(ctx);
}
len0 <<= 3;
if (is_endian.little) {
#ifdef BSWAP8
- ctx->Yi.u[1] ^= BSWAP8(len0);
+ ctx->Xi.u[1] ^= BSWAP8(len0);
#else
- ctx->Yi.c[8] ^= (u8)(len0 >> 56);
- ctx->Yi.c[9] ^= (u8)(len0 >> 48);
- ctx->Yi.c[10] ^= (u8)(len0 >> 40);
- ctx->Yi.c[11] ^= (u8)(len0 >> 32);
- ctx->Yi.c[12] ^= (u8)(len0 >> 24);
- ctx->Yi.c[13] ^= (u8)(len0 >> 16);
- ctx->Yi.c[14] ^= (u8)(len0 >> 8);
- ctx->Yi.c[15] ^= (u8)(len0);
+ ctx->Xi.c[8] ^= (u8)(len0 >> 56);
+ ctx->Xi.c[9] ^= (u8)(len0 >> 48);
+ ctx->Xi.c[10] ^= (u8)(len0 >> 40);
+ ctx->Xi.c[11] ^= (u8)(len0 >> 32);
+ ctx->Xi.c[12] ^= (u8)(len0 >> 24);
+ ctx->Xi.c[13] ^= (u8)(len0 >> 16);
+ ctx->Xi.c[14] ^= (u8)(len0 >> 8);
+ ctx->Xi.c[15] ^= (u8)(len0);
#endif
- } else
- ctx->Yi.u[1] ^= len0;
+ } else {
+ ctx->Xi.u[1] ^= len0;
+ }
- GCM_MUL(ctx, Yi);
+ GCM_MUL(ctx);
if (is_endian.little)
#ifdef BSWAP4
- ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = BSWAP4(ctx->Xi.d[3]);
#else
- ctr = GETU32(ctx->Yi.c + 12);
+ ctr = GETU32(ctx->Xi.c + 12);
#endif
else
- ctr = ctx->Yi.d[3];
+ ctr = ctx->Xi.d[3];
+
+ /* Copy borrowed Xi to Yi */
+ ctx->Yi.u[0] = ctx->Xi.u[0];
+ ctx->Yi.u[1] = ctx->Xi.u[1];
}
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+
(*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
++ctr;
if (is_endian.little)
@@ -1002,7 +947,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
n = (n + 1) % 16;
}
if (n == 0)
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
else {
ctx->ares = n;
return 0;
@@ -1018,7 +963,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
while (len >= 16) {
for (i = 0; i < 16; ++i)
ctx->Xi.c[i] ^= aad[i];
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
aad += 16;
len -= 16;
}
@@ -1040,33 +985,42 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
const union {
long one;
char little;
- } is_endian = {
- 1
- };
- unsigned int n, ctr;
+ } is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
void *key = ctx->key;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
-# ifdef GHASH
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
const u8 *inp, size_t len) = ctx->ghash;
# endif
#endif
-#if 0
- n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
-#endif
mlen += len;
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
return -1;
ctx->len.u[1] = mlen;
+ mres = ctx->mres;
+
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- GCM_MUL(ctx, Xi);
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
+ GCM_MUL(ctx);
+#endif
ctx->ares = 0;
}
@@ -1079,28 +1033,49 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
else
ctr = ctx->Yi.d[3];
- n = ctx->mres;
+ n = mres % 16;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
- if (n == 0)
- GCM_MUL(ctx, Xi);
- else {
+ if (n == 0) {
+ GCM_MUL(ctx);
+ mres = 0;
+ } else {
ctx->mres = n;
return 0;
}
+# endif
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
@@ -1111,11 +1086,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
(*block) (ctx->Yi.c, ctx->EKi.c, key);
++ctr;
if (is_endian.little)
-# ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-# else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-# endif
+# endif
else
ctx->Yi.d[3] = ctr;
for (i = 0; i < 16 / sizeof(size_t); ++i)
@@ -1127,6 +1102,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
len -= GHASH_CHUNK;
}
+# endif
if ((i = (len & (size_t)-16))) {
size_t j = i;
@@ -1169,7 +1145,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
ctx->Yi.d[3] = ctr;
for (i = 0; i < 16 / sizeof(size_t); ++i)
ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
out += 16;
in += 16;
len -= 16;
@@ -1186,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
# endif
else
ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+ while (len--) {
+ ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+ ++n;
+ }
+# else
while (len--) {
ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
++n;
}
+ mres = n;
+# endif
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
} while (0);
}
@@ -1210,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
else
ctx->Yi.d[3] = ctr;
}
- ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
n = (n + 1) % 16;
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+ mres = 0;
+ }
+#else
+ ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+ mres = n = (n + 1) % 16;
if (n == 0)
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
+#endif
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
}
@@ -1227,17 +1220,15 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
const union {
long one;
char little;
- } is_endian = {
- 1
- };
- unsigned int n, ctr;
+ } is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
block128_f block = ctx->block;
void *key = ctx->key;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
-# ifdef GHASH
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
const u8 *inp, size_t len) = ctx->ghash;
# endif
@@ -1248,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
return -1;
ctx->len.u[1] = mlen;
+ mres = ctx->mres;
+
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
- GCM_MUL(ctx, Xi);
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
+ GCM_MUL(ctx);
+#endif
ctx->ares = 0;
}
@@ -1263,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
else
ctr = ctx->Yi.d[3];
- n = ctx->mres;
+ n = mres % 16;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
if (16 % sizeof(size_t) == 0) { /* always true actually */
do {
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
@@ -1275,18 +1294,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
--len;
n = (n + 1) % 16;
}
- if (n == 0)
- GCM_MUL(ctx, Xi);
- else {
+ if (n == 0) {
+ GCM_MUL(ctx);
+ mres = 0;
+ } else {
ctx->mres = n;
return 0;
}
+# endif
}
# if defined(STRICT_ALIGNMENT)
if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
break;
# endif
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
@@ -1298,11 +1324,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
(*block) (ctx->Yi.c, ctx->EKi.c, key);
++ctr;
if (is_endian.little)
-# ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-# else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-# endif
+# endif
else
ctx->Yi.d[3] = ctr;
for (i = 0; i < 16 / sizeof(size_t); ++i)
@@ -1313,6 +1339,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
}
len -= GHASH_CHUNK;
}
+# endif
if ((i = (len & (size_t)-16))) {
GHASH(ctx, in, i);
while (len >= 16) {
@@ -1356,7 +1383,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
out[i] = c ^ ctx->EKi.t[i];
ctx->Xi.t[i] ^= c;
}
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
out += 16;
in += 16;
len -= 16;
@@ -1373,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
# endif
else
ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+ while (len--) {
+ out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+ ++n;
+ }
+# else
while (len--) {
u8 c = in[n];
ctx->Xi.c[n] ^= c;
out[n] = c ^ ctx->EKi.c[n];
++n;
}
+ mres = n;
+# endif
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
} while (0);
}
@@ -1400,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
else
ctx->Yi.d[3] = ctr;
}
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
+ n = (n + 1) % 16;
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+ mres = 0;
+ }
+#else
c = in[i];
out[i] = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
- n = (n + 1) % 16;
+ mres = n = (n + 1) % 16;
if (n == 0)
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
+#endif
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
}
@@ -1416,68 +1460,104 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
const unsigned char *in, unsigned char *out,
size_t len, ctr128_f stream)
{
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ return CRYPTO_gcm128_encrypt(ctx, in, out, len);
+#else
const union {
long one;
char little;
- } is_endian = {
- 1
- };
- unsigned int n, ctr;
+ } is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
-#ifdef GCM_FUNCREF_4BIT
+# ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
-# ifdef GHASH
+# ifdef GHASH
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
const u8 *inp, size_t len) = ctx->ghash;
+# endif
# endif
-#endif
mlen += len;
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
return -1;
ctx->len.u[1] = mlen;
+ mres = ctx->mres;
+
if (ctx->ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- GCM_MUL(ctx, Xi);
+#if defined(GHASH)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+#else
+ GCM_MUL(ctx);
+#endif
ctx->ares = 0;
}
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
-#else
+# else
ctr = GETU32(ctx->Yi.c + 12);
-#endif
+# endif
else
ctr = ctx->Yi.d[3];
- n = ctx->mres;
+ n = mres % 16;
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
- if (n == 0)
- GCM_MUL(ctx, Xi);
- else {
+ if (n == 0) {
+ GCM_MUL(ctx);
+ mres = 0;
+ } else {
ctx->mres = n;
return 0;
}
+# endif
}
-#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
-# ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-# else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-# endif
+# endif
else
ctx->Yi.d[3] = ctr;
GHASH(ctx, out, GHASH_CHUNK);
@@ -1485,99 +1565,134 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
-#endif
+# endif
+# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
(*stream) (in, out, j, key, ctx->Yi.c);
ctr += (unsigned int)j;
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-#else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-#endif
+# endif
else
ctx->Yi.d[3] = ctr;
in += i;
len -= i;
-#if defined(GHASH)
+# if defined(GHASH)
GHASH(ctx, out, i);
out += i;
-#else
+# else
while (j--) {
for (i = 0; i < 16; ++i)
ctx->Xi.c[i] ^= out[i];
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
out += 16;
}
-#endif
+# endif
}
if (len) {
(*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
++ctr;
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-#else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-#endif
+# endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
- ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# if defined(GHASH)
+ ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+# else
+ ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# endif
++n;
}
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
+#endif
}
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
const unsigned char *in, unsigned char *out,
size_t len, ctr128_f stream)
{
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ return CRYPTO_gcm128_decrypt(ctx, in, out, len);
+#else
const union {
long one;
char little;
- } is_endian = {
- 1
- };
- unsigned int n, ctr;
+ } is_endian = { 1 };
+ unsigned int n, ctr, mres;
size_t i;
u64 mlen = ctx->len.u[1];
void *key = ctx->key;
-#ifdef GCM_FUNCREF_4BIT
+# ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
-# ifdef GHASH
+# ifdef GHASH
void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
const u8 *inp, size_t len) = ctx->ghash;
+# endif
# endif
-#endif
mlen += len;
if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
return -1;
ctx->len.u[1] = mlen;
+ mres = ctx->mres;
+
if (ctx->ares) {
/* First call to decrypt finalizes GHASH(AAD) */
- GCM_MUL(ctx, Xi);
+# if defined(GHASH)
+ if (len == 0) {
+ GCM_MUL(ctx);
+ ctx->ares = 0;
+ return 0;
+ }
+ memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ mres = sizeof(ctx->Xi);
+# else
+ GCM_MUL(ctx);
+# endif
ctx->ares = 0;
}
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctr = BSWAP4(ctx->Yi.d[3]);
-#else
+# else
ctr = GETU32(ctx->Yi.c + 12);
-#endif
+# endif
else
ctr = ctx->Yi.d[3];
- n = ctx->mres;
+ n = mres % 16;
if (n) {
+# if defined(GHASH)
+ while (n && len) {
+ *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ } else {
+ ctx->mres = mres;
+ return 0;
+ }
+# else
while (n && len) {
u8 c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
@@ -1585,55 +1700,63 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
--len;
n = (n + 1) % 16;
}
- if (n == 0)
- GCM_MUL(ctx, Xi);
- else {
+ if (n == 0) {
+ GCM_MUL(ctx);
+ mres = 0;
+ } else {
ctx->mres = n;
return 0;
}
+# endif
}
-#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+# if defined(GHASH)
+ if (len >= 16 && mres) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+# if defined(GHASH_CHUNK)
while (len >= GHASH_CHUNK) {
GHASH(ctx, in, GHASH_CHUNK);
(*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
if (is_endian.little)
-# ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-# else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-# endif
+# endif
else
ctx->Yi.d[3] = ctr;
out += GHASH_CHUNK;
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
-#endif
+# endif
+# endif
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
-#if defined(GHASH)
+# if defined(GHASH)
GHASH(ctx, in, i);
-#else
+# else
while (j--) {
size_t k;
for (k = 0; k < 16; ++k)
ctx->Xi.c[k] ^= in[k];
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
in += 16;
}
j = i / 16;
in -= i;
-#endif
+# endif
(*stream) (in, out, j, key, ctx->Yi.c);
ctr += (unsigned int)j;
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-#else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-#endif
+# endif
else
ctx->Yi.d[3] = ctr;
out += i;
@@ -1644,23 +1767,28 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
(*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
++ctr;
if (is_endian.little)
-#ifdef BSWAP4
+# ifdef BSWAP4
ctx->Yi.d[3] = BSWAP4(ctr);
-#else
+# else
PUTU32(ctx->Yi.c + 12, ctr);
-#endif
+# endif
else
ctx->Yi.d[3] = ctr;
while (len--) {
+# if defined(GHASH)
+ out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+# else
u8 c = in[n];
- ctx->Xi.c[n] ^= c;
+ ctx->Xi.c[mres++] ^= c;
out[n] = c ^ ctx->EKi.c[n];
+# endif
++n;
}
}
- ctx->mres = n;
+ ctx->mres = mres;
return 0;
+#endif
}
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
@@ -1669,17 +1797,37 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
const union {
long one;
char little;
- } is_endian = {
- 1
- };
+ } is_endian = { 1 };
u64 alen = ctx->len.u[0] << 3;
u64 clen = ctx->len.u[1] << 3;
#ifdef GCM_FUNCREF_4BIT
void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
+ const u8 *inp, size_t len) = ctx->ghash;
+# endif
#endif
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ u128 bitlen;
+ unsigned int mres = ctx->mres;
+
+ if (mres) {
+ unsigned blocks = (mres + 15) & -16;
+
+ memset(ctx->Xn + mres, 0, blocks - mres);
+ mres = blocks;
+ if (mres == sizeof(ctx->Xn)) {
+ GHASH(ctx, ctx->Xn, mres);
+ mres = 0;
+ }
+ } else if (ctx->ares) {
+ GCM_MUL(ctx);
+ }
+#else
if (ctx->mres || ctx->ares)
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
+#endif
if (is_endian.little) {
#ifdef BSWAP8
@@ -1696,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
#endif
}
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ bitlen.hi = alen;
+ bitlen.lo = clen;
+ memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
+ mres += sizeof(bitlen);
+ GHASH(ctx, ctx->Xn, mres);
+#else
ctx->Xi.u[0] ^= alen;
ctx->Xi.u[1] ^= clen;
- GCM_MUL(ctx, Xi);
+ GCM_MUL(ctx);
+#endif
ctx->Xi.u[0] ^= ctx->EK0.u[0];
ctx->Xi.u[1] ^= ctx->EK0.u[1];
@@ -1720,7 +1876,7 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
{
GCM128_CONTEXT *ret;
- if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
+ if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
CRYPTO_gcm128_init(ret, key, block);
return ret;
@@ -1728,644 +1884,5 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
{
- if (ctx) {
- OPENSSL_cleanse(ctx, sizeof(*ctx));
- OPENSSL_free(ctx);
- }
+ OPENSSL_clear_free(ctx, sizeof(*ctx));
}
-
-#if defined(SELFTEST)
-# include <stdio.h>
-# include <openssl/aes.h>
-
-/* Test Case 1 */
-static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
-static const u8 T1[] = {
- 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
- 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
-};
-
-/* Test Case 2 */
-# define K2 K1
-# define A2 A1
-# define IV2 IV1
-static const u8 P2[16];
-static const u8 C2[] = {
- 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
- 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
-};
-
-static const u8 T2[] = {
- 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
- 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
-};
-
-/* Test Case 3 */
-# define A3 A2
-static const u8 K3[] = {
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
-};
-
-static const u8 P3[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV3[] = {
- 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
- 0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C3[] = {
- 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
- 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
- 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
- 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
- 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
- 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
- 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
- 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
-};
-
-static const u8 T3[] = {
- 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
- 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
-};
-
-/* Test Case 4 */
-# define K4 K3
-# define IV4 IV3
-static const u8 P4[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A4[] = {
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C4[] = {
- 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
- 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
- 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
- 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
- 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
- 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
- 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
- 0x3d, 0x58, 0xe0, 0x91
-};
-
-static const u8 T4[] = {
- 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
- 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
-};
-
-/* Test Case 5 */
-# define K5 K4
-# define P5 P4
-# define A5 A4
-static const u8 IV5[] = {
- 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
-};
-
-static const u8 C5[] = {
- 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
- 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
- 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
- 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
- 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
- 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
- 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
- 0xc2, 0x3f, 0x45, 0x98
-};
-
-static const u8 T5[] = {
- 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
- 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
-};
-
-/* Test Case 6 */
-# define K6 K5
-# define P6 P5
-# define A6 A5
-static const u8 IV6[] = {
- 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
- 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
- 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
- 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
- 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
- 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
- 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
- 0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C6[] = {
- 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
- 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
- 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
- 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
- 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
- 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
- 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
- 0x4c, 0x34, 0xae, 0xe5
-};
-
-static const u8 T6[] = {
- 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
- 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
-};
-
-/* Test Case 7 */
-static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
-static const u8 T7[] = {
- 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
- 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
-};
-
-/* Test Case 8 */
-# define K8 K7
-# define IV8 IV7
-# define A8 A7
-static const u8 P8[16];
-static const u8 C8[] = {
- 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
- 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
-};
-
-static const u8 T8[] = {
- 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
- 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
-};
-
-/* Test Case 9 */
-# define A9 A8
-static const u8 K9[] = {
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
-};
-
-static const u8 P9[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV9[] = {
- 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
- 0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C9[] = {
- 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
- 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
- 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
- 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
- 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
- 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
- 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
- 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
-};
-
-static const u8 T9[] = {
- 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
- 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
-};
-
-/* Test Case 10 */
-# define K10 K9
-# define IV10 IV9
-static const u8 P10[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A10[] = {
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C10[] = {
- 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
- 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
- 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
- 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
- 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
- 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
- 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
- 0xcc, 0xda, 0x27, 0x10
-};
-
-static const u8 T10[] = {
- 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
- 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
-};
-
-/* Test Case 11 */
-# define K11 K10
-# define P11 P10
-# define A11 A10
-static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
-
-static const u8 C11[] = {
- 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
- 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
- 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
- 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
- 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
- 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
- 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
- 0xa0, 0xf0, 0x62, 0xf7
-};
-
-static const u8 T11[] = {
- 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
- 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
-};
-
-/* Test Case 12 */
-# define K12 K11
-# define P12 P11
-# define A12 A11
-static const u8 IV12[] = {
- 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
- 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
- 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
- 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
- 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
- 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
- 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
- 0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C12[] = {
- 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
- 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
- 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
- 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
- 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
- 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
- 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
- 0xe9, 0xb7, 0x37, 0x3b
-};
-
-static const u8 T12[] = {
- 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
- 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
-};
-
-/* Test Case 13 */
-static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
-static const u8 T13[] = {
- 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
- 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
-};
-
-/* Test Case 14 */
-# define K14 K13
-# define A14 A13
-static const u8 P14[16], IV14[12];
-static const u8 C14[] = {
- 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
- 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
-};
-
-static const u8 T14[] = {
- 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
- 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
-};
-
-/* Test Case 15 */
-# define A15 A14
-static const u8 K15[] = {
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
- 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
- 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
-};
-
-static const u8 P15[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV15[] = {
- 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
- 0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C15[] = {
- 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
- 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
- 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
- 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
- 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
- 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
- 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
- 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
-};
-
-static const u8 T15[] = {
- 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
- 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
-};
-
-/* Test Case 16 */
-# define K16 K15
-# define IV16 IV15
-static const u8 P16[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A16[] = {
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
- 0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C16[] = {
- 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
- 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
- 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
- 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
- 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
- 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
- 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
- 0xbc, 0xc9, 0xf6, 0x62
-};
-
-static const u8 T16[] = {
- 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
- 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
-};
-
-/* Test Case 17 */
-# define K17 K16
-# define P17 P16
-# define A17 A16
-static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
-
-static const u8 C17[] = {
- 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
- 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
- 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
- 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
- 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
- 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
- 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
- 0xf4, 0x7c, 0x9b, 0x1f
-};
-
-static const u8 T17[] = {
- 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
- 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
-};
-
-/* Test Case 18 */
-# define K18 K17
-# define P18 P17
-# define A18 A17
-static const u8 IV18[] = {
- 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
- 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
- 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
- 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
- 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
- 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
- 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
- 0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C18[] = {
- 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
- 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
- 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
- 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
- 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
- 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
- 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
- 0x44, 0xae, 0x7e, 0x3f
-};
-
-static const u8 T18[] = {
- 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
- 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
-};
-
-/* Test Case 19 */
-# define K19 K1
-# define P19 P1
-# define IV19 IV1
-# define C19 C1
-static const u8 A19[] = {
- 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
- 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
- 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
- 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
- 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
- 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
- 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
- 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
- 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
- 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
- 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
- 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
- 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
- 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
- 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
- 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
-};
-
-static const u8 T19[] = {
- 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
- 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
-};
-
-/* Test Case 20 */
-# define K20 K1
-# define A20 A1
-/* this results in 0xff in counter LSB */
-static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
-
-static const u8 P20[288];
-static const u8 C20[] = {
- 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
- 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
- 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
- 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
- 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
- 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
- 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
- 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
- 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
- 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
- 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
- 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
- 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
- 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
- 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
- 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
- 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
- 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
- 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
- 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
- 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
- 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
- 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
- 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
- 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
- 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
- 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
- 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
- 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
- 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
- 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
- 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
- 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
- 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
- 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
- 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
-};
-
-static const u8 T20[] = {
- 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
- 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
-};
-
-# define TEST_CASE(n) do { \
- u8 out[sizeof(P##n)]; \
- AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
- CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
- CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
- memset(out,0,sizeof(out)); \
- if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
- if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
- if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
- (C##n && memcmp(out,C##n,sizeof(out)))) \
- ret++, printf ("encrypt test#%d failed.\n",n); \
- CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
- memset(out,0,sizeof(out)); \
- if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
- if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
- if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
- (P##n && memcmp(out,P##n,sizeof(out)))) \
- ret++, printf ("decrypt test#%d failed.\n",n); \
- } while(0)
-
-int main()
-{
- GCM128_CONTEXT ctx;
- AES_KEY key;
- int ret = 0;
-
- TEST_CASE(1);
- TEST_CASE(2);
- TEST_CASE(3);
- TEST_CASE(4);
- TEST_CASE(5);
- TEST_CASE(6);
- TEST_CASE(7);
- TEST_CASE(8);
- TEST_CASE(9);
- TEST_CASE(10);
- TEST_CASE(11);
- TEST_CASE(12);
- TEST_CASE(13);
- TEST_CASE(14);
- TEST_CASE(15);
- TEST_CASE(16);
- TEST_CASE(17);
- TEST_CASE(18);
- TEST_CASE(19);
- TEST_CASE(20);
-
-# ifdef OPENSSL_CPUID_OBJ
- {
- size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
- union {
- u64 u;
- u8 c[1024];
- } buf;
- int i;
-
- AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
- CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
- CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
-
- CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
- start = OPENSSL_rdtsc();
- CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
- gcm_t = OPENSSL_rdtsc() - start;
-
- CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
- &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
- (block128_f) AES_encrypt);
- start = OPENSSL_rdtsc();
- CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
- &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
- (block128_f) AES_encrypt);
- ctr_t = OPENSSL_rdtsc() - start;
-
- printf("%.2f-%.2f=%.2f\n",
- gcm_t / (double)sizeof(buf),
- ctr_t / (double)sizeof(buf),
- (gcm_t - ctr_t) / (double)sizeof(buf));
-# ifdef GHASH
- {
- void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
- const u8 *inp, size_t len) = ctx.ghash;
-
- GHASH((&ctx), buf.c, sizeof(buf));
- start = OPENSSL_rdtsc();
- for (i = 0; i < 100; ++i)
- GHASH((&ctx), buf.c, sizeof(buf));
- gcm_t = OPENSSL_rdtsc() - start;
- printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
- }
-# endif
- }
-# endif
-
- return ret;
-}
-#endif
diff --git a/crypto/modes/modes.h b/crypto/modes/modes.h
deleted file mode 100644
index fd488499a0b4..000000000000
--- a/crypto/modes/modes.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
- *
- * Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- */
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-typedef void (*block128_f) (const unsigned char in[16],
- unsigned char out[16], const void *key);
-
-typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], int enc);
-
-typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out,
- size_t blocks, const void *key,
- const unsigned char ivec[16]);
-
-typedef void (*ccm128_f) (const unsigned char *in, unsigned char *out,
- size_t blocks, const void *key,
- const unsigned char ivec[16],
- unsigned char cmac[16]);
-
-void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], block128_f block);
-void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], block128_f block);
-
-void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16],
- unsigned char ecount_buf[16], unsigned int *num,
- block128_f block);
-
-void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16],
- unsigned char ecount_buf[16],
- unsigned int *num, ctr128_f ctr);
-
-void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], int *num,
- block128_f block);
-
-void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], int *num,
- int enc, block128_f block);
-void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
- size_t length, const void *key,
- unsigned char ivec[16], int *num,
- int enc, block128_f block);
-void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
- size_t bits, const void *key,
- unsigned char ivec[16], int *num,
- int enc, block128_f block);
-
-size_t CRYPTO_cts128_encrypt_block(const unsigned char *in,
- unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16],
- block128_f block);
-size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], cbc128_f cbc);
-size_t CRYPTO_cts128_decrypt_block(const unsigned char *in,
- unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16],
- block128_f block);
-size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], cbc128_f cbc);
-
-size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in,
- unsigned char *out, size_t len,
- const void *key,
- unsigned char ivec[16],
- block128_f block);
-size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], cbc128_f cbc);
-size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in,
- unsigned char *out, size_t len,
- const void *key,
- unsigned char ivec[16],
- block128_f block);
-size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
- size_t len, const void *key,
- unsigned char ivec[16], cbc128_f cbc);
-
-typedef struct gcm128_context GCM128_CONTEXT;
-
-GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
-void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block);
-void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
- size_t len);
-int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
- size_t len);
-int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
- const unsigned char *in, unsigned char *out,
- size_t len);
-int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
- const unsigned char *in, unsigned char *out,
- size_t len);
-int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
- const unsigned char *in, unsigned char *out,
- size_t len, ctr128_f stream);
-int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
- const unsigned char *in, unsigned char *out,
- size_t len, ctr128_f stream);
-int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
- size_t len);
-void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
-void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
-
-typedef struct ccm128_context CCM128_CONTEXT;
-
-void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
- unsigned int M, unsigned int L, void *key,
- block128_f block);
-int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, const unsigned char *nonce,
- size_t nlen, size_t mlen);
-void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, const unsigned char *aad,
- size_t alen);
-int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
- unsigned char *out, size_t len);
-int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, const unsigned char *inp,
- unsigned char *out, size_t len);
-int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
- unsigned char *out, size_t len,
- ccm128_f stream);
-int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp,
- unsigned char *out, size_t len,
- ccm128_f stream);
-size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
-
-typedef struct xts128_context XTS128_CONTEXT;
-
-int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
- const unsigned char iv[16],
- const unsigned char *inp, unsigned char *out,
- size_t len, int enc);
-
-size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
- unsigned char *out,
- const unsigned char *in, size_t inlen,
- block128_f block);
-
-size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
- unsigned char *out,
- const unsigned char *in, size_t inlen,
- block128_f block);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h
index fe14ec7002f0..f2ae01d11afd 100644
--- a/crypto/modes/modes_lcl.h
+++ b/crypto/modes/modes_lcl.h
@@ -1,8 +1,10 @@
-/* ====================================================================
- * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
+/*
+ * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
*
- * Redistribution and use is governed by OpenSSL license.
- * ====================================================================
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/modes.h>
@@ -71,6 +73,7 @@ typedef unsigned char u8;
# endif
# elif defined(_MSC_VER)
# if _MSC_VER>=1300
+# include <stdlib.h>
# pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
# define BSWAP8(x) _byteswap_uint64((u64)(x))
# define BSWAP4(x) _byteswap_ulong((u32)(x))
@@ -125,6 +128,9 @@ struct gcm128_context {
unsigned int mres, ares;
block128_f block;
void *key;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ unsigned char Xn[48];
+#endif
};
struct xts128_context {
@@ -141,3 +147,44 @@ struct ccm128_context {
block128_f block;
void *key;
};
+
+#ifndef OPENSSL_NO_OCB
+
+typedef union {
+ u64 a[2];
+ unsigned char c[16];
+} OCB_BLOCK;
+# define ocb_block16_xor(in1,in2,out) \
+ ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
+ (out)->a[1]=(in1)->a[1]^(in2)->a[1] )
+# if STRICT_ALIGNMENT
+# define ocb_block16_xor_misaligned(in1,in2,out) \
+ ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
+# else
+# define ocb_block16_xor_misaligned ocb_block16_xor
+# endif
+
+struct ocb128_context {
+ /* Need both encrypt and decrypt key schedules for decryption */
+ block128_f encrypt;
+ block128_f decrypt;
+ void *keyenc;
+ void *keydec;
+ ocb128_f stream; /* direction dependent */
+ /* Key dependent variables. Can be reused if key remains the same */
+ size_t l_index;
+ size_t max_l_index;
+ OCB_BLOCK l_star;
+ OCB_BLOCK l_dollar;
+ OCB_BLOCK *l;
+ /* Must be reset for each session */
+ struct {
+ u64 blocks_hashed;
+ u64 blocks_processed;
+ OCB_BLOCK offset_aad;
+ OCB_BLOCK sum;
+ OCB_BLOCK offset;
+ OCB_BLOCK checksum;
+ } sess;
+};
+#endif /* OPENSSL_NO_OCB */
diff --git a/crypto/modes/ocb128.c b/crypto/modes/ocb128.c
new file mode 100644
index 000000000000..713b9aaf19d5
--- /dev/null
+++ b/crypto/modes/ocb128.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include <string.h>
+#include <openssl/crypto.h>
+#include <openssl/err.h>
+#include "modes_lcl.h"
+
+#ifndef OPENSSL_NO_OCB
+
+/*
+ * Calculate the number of binary trailing zero's in any given number
+ */
+static u32 ocb_ntz(u64 n)
+{
+ u32 cnt = 0;
+
+ /*
+ * We do a right-to-left simple sequential search. This is surprisingly
+ * efficient as the distribution of trailing zeros is not uniform,
+ * e.g. the number of possible inputs with no trailing zeros is equal to
+ * the number with 1 or more; the number with exactly 1 is equal to the
+ * number with 2 or more, etc. Checking the last two bits covers 75% of
+ * all numbers. Checking the last three covers 87.5%
+ */
+ while (!(n & 1)) {
+ n >>= 1;
+ cnt++;
+ }
+ return cnt;
+}
+
+/*
+ * Shift a block of 16 bytes left by shift bits
+ */
+static void ocb_block_lshift(const unsigned char *in, size_t shift,
+ unsigned char *out)
+{
+ int i;
+ unsigned char carry = 0, carry_next;
+
+ for (i = 15; i >= 0; i--) {
+ carry_next = in[i] >> (8 - shift);
+ out[i] = (in[i] << shift) | carry;
+ carry = carry_next;
+ }
+}
+
+/*
+ * Perform a "double" operation as per OCB spec
+ */
+static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
+{
+ unsigned char mask;
+
+ /*
+ * Calculate the mask based on the most significant bit. There are more
+ * efficient ways to do this - but this way is constant time
+ */
+ mask = in->c[0] & 0x80;
+ mask >>= 7;
+ mask = (0 - mask) & 0x87;
+
+ ocb_block_lshift(in->c, 1, out->c);
+
+ out->c[15] ^= mask;
+}
+
+/*
+ * Perform an xor on in1 and in2 - each of len bytes. Store result in out
+ */
+static void ocb_block_xor(const unsigned char *in1,
+ const unsigned char *in2, size_t len,
+ unsigned char *out)
+{
+ size_t i;
+ for (i = 0; i < len; i++) {
+ out[i] = in1[i] ^ in2[i];
+ }
+}
+
+/*
+ * Lookup L_index in our lookup table. If we haven't already got it we need to
+ * calculate it
+ */
+static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
+{
+ size_t l_index = ctx->l_index;
+
+ if (idx <= l_index) {
+ return ctx->l + idx;
+ }
+
+ /* We don't have it - so calculate it */
+ if (idx >= ctx->max_l_index) {
+ void *tmp_ptr;
+ /*
+ * Each additional entry allows to process almost double as
+ * much data, so that in linear world the table will need to
+ * be expanded with smaller and smaller increments. Originally
+ * it was doubling in size, which was a waste. Growing it
+ * linearly is not formally optimal, but is simpler to implement.
+ * We grow table by minimally required 4*n that would accommodate
+ * the index.
+ */
+ ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3;
+ tmp_ptr = OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK));
+ if (tmp_ptr == NULL) /* prevent ctx->l from being clobbered */
+ return NULL;
+ ctx->l = tmp_ptr;
+ }
+ while (l_index < idx) {
+ ocb_double(ctx->l + l_index, ctx->l + l_index + 1);
+ l_index++;
+ }
+ ctx->l_index = l_index;
+
+ return ctx->l + idx;
+}
+
+/*
+ * Create a new OCB128_CONTEXT
+ */
+OCB128_CONTEXT *CRYPTO_ocb128_new(void *keyenc, void *keydec,
+ block128_f encrypt, block128_f decrypt,
+ ocb128_f stream)
+{
+ OCB128_CONTEXT *octx;
+ int ret;
+
+ if ((octx = OPENSSL_malloc(sizeof(*octx))) != NULL) {
+ ret = CRYPTO_ocb128_init(octx, keyenc, keydec, encrypt, decrypt,
+ stream);
+ if (ret)
+ return octx;
+ OPENSSL_free(octx);
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialise an existing OCB128_CONTEXT
+ */
+int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
+ block128_f encrypt, block128_f decrypt,
+ ocb128_f stream)
+{
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->l_index = 0;
+ ctx->max_l_index = 5;
+ if ((ctx->l = OPENSSL_malloc(ctx->max_l_index * 16)) == NULL) {
+ CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_INIT, ERR_R_MALLOC_FAILURE);
+ return 0;
+ }
+
+ /*
+ * We set both the encryption and decryption key schedules - decryption
+ * needs both. Don't really need decryption schedule if only doing
+ * encryption - but it simplifies things to take it anyway
+ */
+ ctx->encrypt = encrypt;
+ ctx->decrypt = decrypt;
+ ctx->stream = stream;
+ ctx->keyenc = keyenc;
+ ctx->keydec = keydec;
+
+ /* L_* = ENCIPHER(K, zeros(128)) */
+ ctx->encrypt(ctx->l_star.c, ctx->l_star.c, ctx->keyenc);
+
+ /* L_$ = double(L_*) */
+ ocb_double(&ctx->l_star, &ctx->l_dollar);
+
+ /* L_0 = double(L_$) */
+ ocb_double(&ctx->l_dollar, ctx->l);
+
+ /* L_{i} = double(L_{i-1}) */
+ ocb_double(ctx->l, ctx->l+1);
+ ocb_double(ctx->l+1, ctx->l+2);
+ ocb_double(ctx->l+2, ctx->l+3);
+ ocb_double(ctx->l+3, ctx->l+4);
+ ctx->l_index = 4; /* enough to process up to 496 bytes */
+
+ return 1;
+}
+
+/*
+ * Copy an OCB128_CONTEXT object
+ */
+int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src,
+ void *keyenc, void *keydec)
+{
+ memcpy(dest, src, sizeof(OCB128_CONTEXT));
+ if (keyenc)
+ dest->keyenc = keyenc;
+ if (keydec)
+ dest->keydec = keydec;
+ if (src->l) {
+ if ((dest->l = OPENSSL_malloc(src->max_l_index * 16)) == NULL) {
+ CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_COPY_CTX, ERR_R_MALLOC_FAILURE);
+ return 0;
+ }
+ memcpy(dest->l, src->l, (src->l_index + 1) * 16);
+ }
+ return 1;
+}
+
+/*
+ * Set the IV to be used for this operation. Must be 1 - 15 bytes.
+ */
+int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
+ size_t len, size_t taglen)
+{
+ unsigned char ktop[16], tmp[16], mask;
+ unsigned char stretch[24], nonce[16];
+ size_t bottom, shift;
+
+ /*
+ * Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
+ * We don't support this at this stage
+ */
+ if ((len > 15) || (len < 1) || (taglen > 16) || (taglen < 1)) {
+ return -1;
+ }
+
+ /* Reset nonce-dependent variables */
+ memset(&ctx->sess, 0, sizeof(ctx->sess));
+
+ /* Nonce = num2str(TAGLEN mod 128,7) || zeros(120-bitlen(N)) || 1 || N */
+ nonce[0] = ((taglen * 8) % 128) << 1;
+ memset(nonce + 1, 0, 15);
+ memcpy(nonce + 16 - len, iv, len);
+ nonce[15 - len] |= 1;
+
+ /* Ktop = ENCIPHER(K, Nonce[1..122] || zeros(6)) */
+ memcpy(tmp, nonce, 16);
+ tmp[15] &= 0xc0;
+ ctx->encrypt(tmp, ktop, ctx->keyenc);
+
+ /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
+ memcpy(stretch, ktop, 16);
+ ocb_block_xor(ktop, ktop + 1, 8, stretch + 16);
+
+ /* bottom = str2num(Nonce[123..128]) */
+ bottom = nonce[15] & 0x3f;
+
+ /* Offset_0 = Stretch[1+bottom..128+bottom] */
+ shift = bottom % 8;
+ ocb_block_lshift(stretch + (bottom / 8), shift, ctx->sess.offset.c);
+ mask = 0xff;
+ mask <<= 8 - shift;
+ ctx->sess.offset.c[15] |=
+ (*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
+
+ return 1;
+}
+
+/*
+ * Provide any AAD. This can be called multiple times. Only the final time can
+ * have a partial block
+ */
+int CRYPTO_ocb128_aad(OCB128_CONTEXT *ctx, const unsigned char *aad,
+ size_t len)
+{
+ u64 i, all_num_blocks;
+ size_t num_blocks, last_len;
+ OCB_BLOCK tmp;
+
+ /* Calculate the number of blocks of AAD provided now, and so far */
+ num_blocks = len / 16;
+ all_num_blocks = num_blocks + ctx->sess.blocks_hashed;
+
+ /* Loop through all full blocks of AAD */
+ for (i = ctx->sess.blocks_hashed + 1; i <= all_num_blocks; i++) {
+ OCB_BLOCK *lookup;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ lookup = ocb_lookup_l(ctx, ocb_ntz(i));
+ if (lookup == NULL)
+ return 0;
+ ocb_block16_xor(&ctx->sess.offset_aad, lookup, &ctx->sess.offset_aad);
+
+ memcpy(tmp.c, aad, 16);
+ aad += 16;
+
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ ocb_block16_xor(&ctx->sess.offset_aad, &tmp, &tmp);
+ ctx->encrypt(tmp.c, tmp.c, ctx->keyenc);
+ ocb_block16_xor(&tmp, &ctx->sess.sum, &ctx->sess.sum);
+ }
+
+ /*
+ * Check if we have any partial blocks left over. This is only valid in the
+ * last call to this function
+ */
+ last_len = len % 16;
+
+ if (last_len > 0) {
+ /* Offset_* = Offset_m xor L_* */
+ ocb_block16_xor(&ctx->sess.offset_aad, &ctx->l_star,
+ &ctx->sess.offset_aad);
+
+ /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */
+ memset(tmp.c, 0, 16);
+ memcpy(tmp.c, aad, last_len);
+ tmp.c[last_len] = 0x80;
+ ocb_block16_xor(&ctx->sess.offset_aad, &tmp, &tmp);
+
+ /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */
+ ctx->encrypt(tmp.c, tmp.c, ctx->keyenc);
+ ocb_block16_xor(&tmp, &ctx->sess.sum, &ctx->sess.sum);
+ }
+
+ ctx->sess.blocks_hashed = all_num_blocks;
+
+ return 1;
+}
+
+/*
+ * Provide any data to be encrypted. This can be called multiple times. Only
+ * the final time can have a partial block
+ */
+int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+{
+ u64 i, all_num_blocks;
+ size_t num_blocks, last_len;
+
+ /*
+ * Calculate the number of blocks of data to be encrypted provided now, and
+ * so far
+ */
+ num_blocks = len / 16;
+ all_num_blocks = num_blocks + ctx->sess.blocks_processed;
+
+ if (num_blocks && all_num_blocks == (size_t)all_num_blocks
+ && ctx->stream != NULL) {
+ size_t max_idx = 0, top = (size_t)all_num_blocks;
+
+ /*
+ * See how many L_{i} entries we need to process data at hand
+ * and pre-compute missing entries in the table [if any]...
+ */
+ while (top >>= 1)
+ max_idx++;
+ if (ocb_lookup_l(ctx, max_idx) == NULL)
+ return 0;
+
+ ctx->stream(in, out, num_blocks, ctx->keyenc,
+ (size_t)ctx->sess.blocks_processed + 1, ctx->sess.offset.c,
+ (const unsigned char (*)[16])ctx->l, ctx->sess.checksum.c);
+ } else {
+ /* Loop through all full blocks to be encrypted */
+ for (i = ctx->sess.blocks_processed + 1; i <= all_num_blocks; i++) {
+ OCB_BLOCK *lookup;
+ OCB_BLOCK tmp;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ lookup = ocb_lookup_l(ctx, ocb_ntz(i));
+ if (lookup == NULL)
+ return 0;
+ ocb_block16_xor(&ctx->sess.offset, lookup, &ctx->sess.offset);
+
+ memcpy(tmp.c, in, 16);
+ in += 16;
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_block16_xor(&tmp, &ctx->sess.checksum, &ctx->sess.checksum);
+
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp);
+ ctx->encrypt(tmp.c, tmp.c, ctx->keyenc);
+ ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp);
+
+ memcpy(out, tmp.c, 16);
+ out += 16;
+ }
+ }
+
+ /*
+ * Check if we have any partial blocks left over. This is only valid in the
+ * last call to this function
+ */
+ last_len = len % 16;
+
+ if (last_len > 0) {
+ OCB_BLOCK pad;
+
+ /* Offset_* = Offset_m xor L_* */
+ ocb_block16_xor(&ctx->sess.offset, &ctx->l_star, &ctx->sess.offset);
+
+ /* Pad = ENCIPHER(K, Offset_*) */
+ ctx->encrypt(ctx->sess.offset.c, pad.c, ctx->keyenc);
+
+ /* C_* = P_* xor Pad[1..bitlen(P_*)] */
+ ocb_block_xor(in, pad.c, last_len, out);
+
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ memset(pad.c, 0, 16); /* borrow pad */
+ memcpy(pad.c, in, last_len);
+ pad.c[last_len] = 0x80;
+ ocb_block16_xor(&pad, &ctx->sess.checksum, &ctx->sess.checksum);
+ }
+
+ ctx->sess.blocks_processed = all_num_blocks;
+
+ return 1;
+}
+
+/*
+ * Provide any data to be decrypted. This can be called multiple times. Only
+ * the final time can have a partial block
+ */
+int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+{
+ u64 i, all_num_blocks;
+ size_t num_blocks, last_len;
+
+ /*
+ * Calculate the number of blocks of data to be decrypted provided now, and
+ * so far
+ */
+ num_blocks = len / 16;
+ all_num_blocks = num_blocks + ctx->sess.blocks_processed;
+
+ if (num_blocks && all_num_blocks == (size_t)all_num_blocks
+ && ctx->stream != NULL) {
+ size_t max_idx = 0, top = (size_t)all_num_blocks;
+
+ /*
+ * See how many L_{i} entries we need to process data at hand
+ * and pre-compute missing entries in the table [if any]...
+ */
+ while (top >>= 1)
+ max_idx++;
+ if (ocb_lookup_l(ctx, max_idx) == NULL)
+ return 0;
+
+ ctx->stream(in, out, num_blocks, ctx->keydec,
+ (size_t)ctx->sess.blocks_processed + 1, ctx->sess.offset.c,
+ (const unsigned char (*)[16])ctx->l, ctx->sess.checksum.c);
+ } else {
+ OCB_BLOCK tmp;
+
+ /* Loop through all full blocks to be decrypted */
+ for (i = ctx->sess.blocks_processed + 1; i <= all_num_blocks; i++) {
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ OCB_BLOCK *lookup = ocb_lookup_l(ctx, ocb_ntz(i));
+ if (lookup == NULL)
+ return 0;
+ ocb_block16_xor(&ctx->sess.offset, lookup, &ctx->sess.offset);
+
+ memcpy(tmp.c, in, 16);
+ in += 16;
+
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp);
+ ctx->decrypt(tmp.c, tmp.c, ctx->keydec);
+ ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp);
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_block16_xor(&tmp, &ctx->sess.checksum, &ctx->sess.checksum);
+
+ memcpy(out, tmp.c, 16);
+ out += 16;
+ }
+ }
+
+ /*
+ * Check if we have any partial blocks left over. This is only valid in the
+ * last call to this function
+ */
+ last_len = len % 16;
+
+ if (last_len > 0) {
+ OCB_BLOCK pad;
+
+ /* Offset_* = Offset_m xor L_* */
+ ocb_block16_xor(&ctx->sess.offset, &ctx->l_star, &ctx->sess.offset);
+
+ /* Pad = ENCIPHER(K, Offset_*) */
+ ctx->encrypt(ctx->sess.offset.c, pad.c, ctx->keyenc);
+
+ /* P_* = C_* xor Pad[1..bitlen(C_*)] */
+ ocb_block_xor(in, pad.c, last_len, out);
+
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ memset(pad.c, 0, 16); /* borrow pad */
+ memcpy(pad.c, out, last_len);
+ pad.c[last_len] = 0x80;
+ ocb_block16_xor(&pad, &ctx->sess.checksum, &ctx->sess.checksum);
+ }
+
+ ctx->sess.blocks_processed = all_num_blocks;
+
+ return 1;
+}
+
+static int ocb_finish(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len,
+ int write)
+{
+ OCB_BLOCK tmp;
+
+ if (len > 16 || len < 1) {
+ return -1;
+ }
+
+ /*
+ * Tag = ENCIPHER(K, Checksum_* xor Offset_* xor L_$) xor HASH(K,A)
+ */
+ ocb_block16_xor(&ctx->sess.checksum, &ctx->sess.offset, &tmp);
+ ocb_block16_xor(&ctx->l_dollar, &tmp, &tmp);
+ ctx->encrypt(tmp.c, tmp.c, ctx->keyenc);
+ ocb_block16_xor(&tmp, &ctx->sess.sum, &tmp);
+
+ if (write) {
+ memcpy(tag, &tmp, len);
+ return 1;
+ } else {
+ return CRYPTO_memcmp(&tmp, tag, len);
+ }
+}
+
+/*
+ * Calculate the tag and verify it against the supplied tag
+ */
+int CRYPTO_ocb128_finish(OCB128_CONTEXT *ctx, const unsigned char *tag,
+ size_t len)
+{
+ return ocb_finish(ctx, (unsigned char*)tag, len, 0);
+}
+
+/*
+ * Retrieve the calculated tag
+ */
+int CRYPTO_ocb128_tag(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len)
+{
+ return ocb_finish(ctx, tag, len, 1);
+}
+
+/*
+ * Release all resources
+ */
+void CRYPTO_ocb128_cleanup(OCB128_CONTEXT *ctx)
+{
+ if (ctx) {
+ OPENSSL_clear_free(ctx->l, ctx->max_l_index * 16);
+ OPENSSL_cleanse(ctx, sizeof(*ctx));
+ }
+}
+
+#endif /* OPENSSL_NO_OCB */
diff --git a/crypto/modes/ofb128.c b/crypto/modes/ofb128.c
index 4dbaccd7a6ae..83092564c6b6 100644
--- a/crypto/modes/ofb128.c
+++ b/crypto/modes/ofb128.c
@@ -1,64 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+/*
+ * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
*
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
/*
* The input and output encrypted as though 128bit ofb mode is being used.
* The extra state information to record how much of the 128bit block we have
@@ -71,8 +23,6 @@ void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
unsigned int n;
size_t l = 0;
- assert(in && out && key && ivec && num);
-
n = *num;
#if !defined(OPENSSL_SMALL_FOOTPRINT)
diff --git a/crypto/modes/wrap128.c b/crypto/modes/wrap128.c
index 384978371af2..d7e56cc260ad 100644
--- a/crypto/modes/wrap128.c
+++ b/crypto/modes/wrap128.c
@@ -1,70 +1,50 @@
-/* crypto/modes/wrap128.c */
/*
- * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
- * project.
- */
-/* ====================================================================
- * Copyright (c) 2013 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
*
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/** Beware!
*
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+ * Following wrapping modes were designed for AES but this implementation
+ * allows you to use them for any 128 bit block cipher.
*/
-#include "cryptlib.h"
+#include "internal/cryptlib.h"
#include <openssl/modes.h>
+/** RFC 3394 section 2.2.3.1 Default Initial Value */
static const unsigned char default_iv[] = {
0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
};
-/*
- * Input size limit: lower than maximum of standards but far larger than
- * anything that will be used in practice.
+/** RFC 5649 section 3 Alternative Initial Value 32-bit constant */
+static const unsigned char default_aiv[] = {
+ 0xA6, 0x59, 0x59, 0xA6
+};
+
+/** Input size limit: lower than maximum of standards but far larger than
+ * anything that will be used in practice.
*/
#define CRYPTO128_WRAP_MAX (1UL << 31)
+/** Wrapping according to RFC 3394 section 2.2.1.
+ *
+ * @param[in] key Key value.
+ * @param[in] iv IV value. Length = 8 bytes. NULL = use default_iv.
+ * @param[in] in Plaintext as n 64-bit blocks, n >= 2.
+ * @param[in] inlen Length of in.
+ * @param[out] out Ciphertext. Minimal buffer length = (inlen + 8) bytes.
+ * Input and output buffers can overlap if block function
+ * supports that.
+ * @param[in] block Block processing function.
+ * @return 0 if inlen does not consist of n 64-bit blocks, n >= 2.
+ * or if inlen > CRYPTO128_WRAP_MAX.
+ * Output length if wrapping succeeded.
+ */
size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
unsigned char *out,
const unsigned char *in, size_t inlen,
@@ -72,7 +52,7 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
{
unsigned char *A, B[16], *R;
size_t i, j, t;
- if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX))
+ if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX))
return 0;
A = B;
t = 1;
@@ -100,10 +80,26 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
return inlen + 8;
}
-size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
- unsigned char *out,
- const unsigned char *in, size_t inlen,
- block128_f block)
+/** Unwrapping according to RFC 3394 section 2.2.2 steps 1-2.
+ * The IV check (step 3) is responsibility of the caller.
+ *
+ * @param[in] key Key value.
+ * @param[out] iv Unchecked IV value. Minimal buffer length = 8 bytes.
+ * @param[out] out Plaintext without IV.
+ * Minimal buffer length = (inlen - 8) bytes.
+ * Input and output buffers can overlap if block function
+ * supports that.
+ * @param[in] in Ciphertext as n 64-bit blocks.
+ * @param[in] inlen Length of in.
+ * @param[in] block Block processing function.
+ * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
+ * or if inlen is not a multiple of 8.
+ * Output length otherwise.
+ */
+static size_t crypto_128_unwrap_raw(void *key, unsigned char *iv,
+ unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ block128_f block)
{
unsigned char *A, B[16], *R;
size_t i, j, t;
@@ -128,11 +124,208 @@ size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
memcpy(R, B + 8, 8);
}
}
+ memcpy(iv, A, 8);
+ return inlen;
+}
+
+/** Unwrapping according to RFC 3394 section 2.2.2, including the IV check.
+ * The first block of plaintext has to match the supplied IV, otherwise an
+ * error is returned.
+ *
+ * @param[in] key Key value.
+ * @param[out] iv IV value to match against. Length = 8 bytes.
+ * NULL = use default_iv.
+ * @param[out] out Plaintext without IV.
+ * Minimal buffer length = (inlen - 8) bytes.
+ * Input and output buffers can overlap if block function
+ * supports that.
+ * @param[in] in Ciphertext as n 64-bit blocks.
+ * @param[in] inlen Length of in.
+ * @param[in] block Block processing function.
+ * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
+ * or if inlen is not a multiple of 8
+ * or if IV doesn't match expected value.
+ * Output length otherwise.
+ */
+size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
+ unsigned char *out, const unsigned char *in,
+ size_t inlen, block128_f block)
+{
+ size_t ret;
+ unsigned char got_iv[8];
+
+ ret = crypto_128_unwrap_raw(key, got_iv, out, in, inlen, block);
+ if (ret == 0)
+ return 0;
+
if (!iv)
iv = default_iv;
- if (memcmp(A, iv, 8)) {
+ if (CRYPTO_memcmp(got_iv, iv, 8)) {
+ OPENSSL_cleanse(out, ret);
+ return 0;
+ }
+ return ret;
+}
+
+/** Wrapping according to RFC 5649 section 4.1.
+ *
+ * @param[in] key Key value.
+ * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv.
+ * @param[out] out Ciphertext. Minimal buffer length = (inlen + 15) bytes.
+ * Input and output buffers can overlap if block function
+ * supports that.
+ * @param[in] in Plaintext as n 64-bit blocks, n >= 2.
+ * @param[in] inlen Length of in.
+ * @param[in] block Block processing function.
+ * @return 0 if inlen is out of range [1, CRYPTO128_WRAP_MAX].
+ * Output length if wrapping succeeded.
+ */
+size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
+ unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ block128_f block)
+{
+ /* n: number of 64-bit blocks in the padded key data
+ *
+ * If length of plain text is not a multiple of 8, pad the plain text octet
+ * string on the right with octets of zeros, where final length is the
+ * smallest multiple of 8 that is greater than length of plain text.
+ * If length of plain text is a multiple of 8, then there is no padding. */
+ const size_t blocks_padded = (inlen + 7) / 8; /* CEILING(m/8) */
+ const size_t padded_len = blocks_padded * 8;
+ const size_t padding_len = padded_len - inlen;
+ /* RFC 5649 section 3: Alternative Initial Value */
+ unsigned char aiv[8];
+ int ret;
+
+ /* Section 1: use 32-bit fixed field for plaintext octet length */
+ if (inlen == 0 || inlen >= CRYPTO128_WRAP_MAX)
+ return 0;
+
+ /* Section 3: Alternative Initial Value */
+ if (!icv)
+ memcpy(aiv, default_aiv, 4);
+ else
+ memcpy(aiv, icv, 4); /* Standard doesn't mention this. */
+
+ aiv[4] = (inlen >> 24) & 0xFF;
+ aiv[5] = (inlen >> 16) & 0xFF;
+ aiv[6] = (inlen >> 8) & 0xFF;
+ aiv[7] = inlen & 0xFF;
+
+ if (padded_len == 8) {
+ /*
+ * Section 4.1 - special case in step 2: If the padded plaintext
+ * contains exactly eight octets, then prepend the AIV and encrypt
+ * the resulting 128-bit block using AES in ECB mode.
+ */
+ memmove(out + 8, in, inlen);
+ memcpy(out, aiv, 8);
+ memset(out + 8 + inlen, 0, padding_len);
+ block(out, out, key);
+ ret = 16; /* AIV + padded input */
+ } else {
+ memmove(out, in, inlen);
+ memset(out + inlen, 0, padding_len); /* Section 4.1 step 1 */
+ ret = CRYPTO_128_wrap(key, aiv, out, out, padded_len, block);
+ }
+
+ return ret;
+}
+
+/** Unwrapping according to RFC 5649 section 4.2.
+ *
+ * @param[in] key Key value.
+ * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv.
+ * @param[out] out Plaintext. Minimal buffer length = (inlen - 8) bytes.
+ * Input and output buffers can overlap if block function
+ * supports that.
+ * @param[in] in Ciphertext as n 64-bit blocks.
+ * @param[in] inlen Length of in.
+ * @param[in] block Block processing function.
+ * @return 0 if inlen is out of range [16, CRYPTO128_WRAP_MAX],
+ * or if inlen is not a multiple of 8
+ * or if IV and message length indicator doesn't match.
+ * Output length if unwrapping succeeded and IV matches.
+ */
+size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
+ unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ block128_f block)
+{
+ /* n: number of 64-bit blocks in the padded key data */
+ size_t n = inlen / 8 - 1;
+ size_t padded_len;
+ size_t padding_len;
+ size_t ptext_len;
+ /* RFC 5649 section 3: Alternative Initial Value */
+ unsigned char aiv[8];
+ static unsigned char zeros[8] = { 0x0 };
+ size_t ret;
+
+ /* Section 4.2: Ciphertext length has to be (n+1) 64-bit blocks. */
+ if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX)
+ return 0;
+
+ if (inlen == 16) {
+ /*
+ * Section 4.2 - special case in step 1: When n=1, the ciphertext
+ * contains exactly two 64-bit blocks and they are decrypted as a
+ * single AES block using AES in ECB mode: AIV | P[1] = DEC(K, C[0] |
+ * C[1])
+ */
+ unsigned char buff[16];
+
+ block(in, buff, key);
+ memcpy(aiv, buff, 8);
+ /* Remove AIV */
+ memcpy(out, buff + 8, 8);
+ padded_len = 8;
+ OPENSSL_cleanse(buff, inlen);
+ } else {
+ padded_len = inlen - 8;
+ ret = crypto_128_unwrap_raw(key, aiv, out, in, inlen, block);
+ if (padded_len != ret) {
+ OPENSSL_cleanse(out, inlen);
+ return 0;
+ }
+ }
+
+ /*
+ * Section 3: AIV checks: Check that MSB(32,A) = A65959A6. Optionally a
+ * user-supplied value can be used (even if standard doesn't mention
+ * this).
+ */
+ if ((!icv && CRYPTO_memcmp(aiv, default_aiv, 4))
+ || (icv && CRYPTO_memcmp(aiv, icv, 4))) {
OPENSSL_cleanse(out, inlen);
return 0;
}
- return inlen;
+
+ /*
+ * Check that 8*(n-1) < LSB(32,AIV) <= 8*n. If so, let ptext_len =
+ * LSB(32,AIV).
+ */
+
+ ptext_len = ((unsigned int)aiv[4] << 24)
+ | ((unsigned int)aiv[5] << 16)
+ | ((unsigned int)aiv[6] << 8)
+ | (unsigned int)aiv[7];
+ if (8 * (n - 1) >= ptext_len || ptext_len > 8 * n) {
+ OPENSSL_cleanse(out, inlen);
+ return 0;
+ }
+
+ /*
+ * Check that the rightmost padding_len octets of the output data are
+ * zero.
+ */
+ padding_len = padded_len - ptext_len;
+ if (CRYPTO_memcmp(out + ptext_len, zeros, padding_len) != 0) {
+ OPENSSL_cleanse(out, inlen);
+ return 0;
+ }
+
+ /* Section 4.2 step 3: Remove padding */
+ return ptext_len;
}
diff --git a/crypto/modes/xts128.c b/crypto/modes/xts128.c
index 8f2af588b26d..81b1eacd5920 100644
--- a/crypto/modes/xts128.c
+++ b/crypto/modes/xts128.c
@@ -1,63 +1,16 @@
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
+/*
+ * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
*/
#include <openssl/crypto.h>
#include "modes_lcl.h"
#include <string.h>
-#ifndef MODES_DEBUG
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-#include <assert.h>
-
int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
const unsigned char iv[16],
const unsigned char *inp, unsigned char *out,