diff options
author | Jung-uk Kim <jkim@FreeBSD.org> | 2018-09-13 19:18:07 +0000 |
---|---|---|
committer | Jung-uk Kim <jkim@FreeBSD.org> | 2018-09-13 19:18:07 +0000 |
commit | a43ce912fc025d11e1395506111f75fc194d7ba5 (patch) | |
tree | 9794cf7720d75938ed0ea4f499c0dcd4b6eacdda /crypto/modes | |
parent | 02be298e504b8554caca6dc85af450e1ea44d19d (diff) | |
download | src-a43ce912fc025d11e1395506111f75fc194d7ba5.tar.gz src-a43ce912fc025d11e1395506111f75fc194d7ba5.zip |
Import OpenSSL 1.1.1.vendor/openssl/1.1.1
Notes
Notes:
svn path=/vendor-crypto/openssl/dist/; revision=338658
svn path=/vendor-crypto/openssl/1.1.1/; revision=338659; tag=vendor/openssl/1.1.1
Diffstat (limited to 'crypto/modes')
-rw-r--r-- | crypto/modes/Makefile | 160 | ||||
-rwxr-xr-x | crypto/modes/asm/aesni-gcm-x86_64.pl | 54 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-armv4.pl | 101 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-c64xplus.pl | 247 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-ia64.pl | 13 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-parisc.pl | 23 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-s390x.pl | 36 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-sparcv9.pl | 32 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-x86.pl | 35 | ||||
-rwxr-xr-x | crypto/modes/asm/ghash-x86_64.pl | 97 | ||||
-rwxr-xr-x | crypto/modes/asm/ghashp8-ppc.pl | 481 | ||||
-rwxr-xr-x | crypto/modes/asm/ghashv8-armx.pl | 404 | ||||
-rw-r--r-- | crypto/modes/build.info | 30 | ||||
-rw-r--r-- | crypto/modes/cbc128.c | 66 | ||||
-rw-r--r-- | crypto/modes/ccm128.c | 59 | ||||
-rw-r--r-- | crypto/modes/cfb128.c | 70 | ||||
-rw-r--r-- | crypto/modes/ctr128.c | 70 | ||||
-rw-r--r-- | crypto/modes/cts128.c | 226 | ||||
-rw-r--r-- | crypto/modes/gcm128.c | 1273 | ||||
-rw-r--r-- | crypto/modes/modes.h | 163 | ||||
-rw-r--r-- | crypto/modes/modes_lcl.h | 55 | ||||
-rw-r--r-- | crypto/modes/ocb128.c | 562 | ||||
-rw-r--r-- | crypto/modes/ofb128.c | 62 | ||||
-rw-r--r-- | crypto/modes/wrap128.c | 313 | ||||
-rw-r--r-- | crypto/modes/xts128.c | 59 |
25 files changed, 2725 insertions, 1966 deletions
diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile deleted file mode 100644 index 2528f4a1b9ca..000000000000 --- a/crypto/modes/Makefile +++ /dev/null @@ -1,160 +0,0 @@ -# -# OpenSSL/crypto/modes/Makefile -# - -DIR= modes -TOP= ../.. -CC= cc -INCLUDES= -I.. -I$(TOP) -I../../include -CFLAG=-g -MAKEFILE= Makefile -AR= ar r - -MODES_ASM_OBJ= - -CFLAGS= $(INCLUDES) $(CFLAG) -ASFLAGS= $(INCLUDES) $(ASFLAG) -AFLAGS= $(ASFLAGS) - -GENERAL=Makefile -TEST= -APPS= - -LIB=$(TOP)/libcrypto.a -LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \ - ccm128.c xts128.c wrap128.c -LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \ - ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ) - -SRC= $(LIBSRC) - -#EXHEADER= store.h str_compat.h -EXHEADER= modes.h -HEADER= modes_lcl.h $(EXHEADER) - -ALL= $(GENERAL) $(SRC) $(HEADER) - -top: - (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) - -all: lib - -lib: $(LIBOBJ) - $(AR) $(LIB) $(LIBOBJ) - $(RANLIB) $(LIB) || echo Never mind. - @touch lib - -ghash-ia64.s: asm/ghash-ia64.pl - $(PERL) asm/ghash-ia64.pl $@ $(CFLAGS) -ghash-x86.s: asm/ghash-x86.pl - $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -ghash-x86_64.s: asm/ghash-x86_64.pl - $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@ -aesni-gcm-x86_64.s: asm/aesni-gcm-x86_64.pl - $(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@ -ghash-sparcv9.s: asm/ghash-sparcv9.pl - $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS) -ghash-alpha.s: asm/ghash-alpha.pl - (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ - $(PERL) asm/ghash-alpha.pl > $$preproc && \ - $(CC) -E -P $$preproc > $@ && rm $$preproc) -ghash-parisc.s: asm/ghash-parisc.pl - $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ -ghashv8-armx.S: asm/ghashv8-armx.pl - $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ -ghashp8-ppc.s: asm/ghashp8-ppc.pl - $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ - -# GNU make "catch all" -ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ - -ghash-armv4.o: ghash-armv4.S -ghashv8-armx.o: ghashv8-armx.S - -files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO - -links: - @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) - @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) - @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) - -install: - @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... - @headerlist="$(EXHEADER)"; for i in $$headerlist; \ - do \ - (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ - chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ - done; - -tags: - ctags $(SRC) - -tests: - -lint: - lint -DLINT $(INCLUDES) $(SRC)>fluff - -update: depend - -depend: - @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) - -dclean: - $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new - mv -f Makefile.new $(MAKEFILE) - -clean: - rm -f *.s *.S *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h -ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h -cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h -ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h -cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h -gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h -ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c -wrap128.o: ../../e_os.h ../../include/openssl/bio.h -wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h -wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h -wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -wrap128.o: ../cryptlib.h wrap128.c -xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl index 980cfd23efe3..b42016101ebc 100755 --- a/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -22,10 +29,13 @@ # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max # Locktyukhin of Intel Corp. who verified that it reduces shuffles # pressure with notable relative improvement, achieving 1.0 cycle per -# byte processed with 128-bit key on Haswell processor, and 0.74 - -# on Broadwell. [Mentioned results are raw profiled measurements for -# favourable packet size, one divisible by 96. Applications using the -# EVP interface will observe a few percent worse performance.] +# byte processed with 128-bit key on Haswell processor, 0.74 - on +# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled +# measurements for favourable packet size, one divisible by 96. +# Applications using the EVP interface will observe a few percent +# worse performance.] +# +# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). # # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf @@ -60,7 +70,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; if ($avx>1) {{{ @@ -399,17 +409,25 @@ $code.=<<___; .type aesni_gcm_decrypt,\@function,6 .align 32 aesni_gcm_decrypt: +.cfi_startproc xor $ret,$ret cmp \$0x60,$len # minimal accepted length jb .Lgcm_dec_abort lea (%rsp),%rax # save stack pointer +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -501,15 +519,23 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lgcm_dec_abort: mov $ret,%rax # return value ret +.cfi_endproc .size aesni_gcm_decrypt,.-aesni_gcm_decrypt ___ @@ -609,17 +635,25 @@ _aesni_ctr32_6x: .type aesni_gcm_encrypt,\@function,6 .align 32 aesni_gcm_encrypt: +.cfi_startproc xor $ret,$ret cmp \$0x60*3,$len # minimal accepted length jb .Lgcm_enc_abort lea (%rsp),%rax # save stack pointer +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -882,15 +916,23 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lgcm_enc_abort: mov $ret,%rax # return value ret +.cfi_endproc .size aesni_gcm_encrypt,.-aesni_gcm_encrypt ___ diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl index 8ccc963ef297..dcc23f7d7dbe 100755 --- a/crypto/modes/asm/ghash-armv4.pl +++ b/crypto/modes/asm/ghash-armv4.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -42,12 +49,12 @@ # below and combine it with reduction algorithm from x86 module. # Performance improvement over previous version varies from 65% on # Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8 -# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 - -# in 9.33. +# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63, +# Snapdragon S4 - in 9.33. # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== @@ -71,8 +78,20 @@ # *native* byte order on current platform. See gcm128.c for working # example... -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $Xi="r0"; # argument block $Htbl="r1"; @@ -124,11 +143,15 @@ $code=<<___; #include "arm_arch.h" .text +#if defined(__thumb2__) || defined(__clang__) +.syntax unified +#define ldrplb ldrbpl +#define ldrneb ldrbne +#endif +#if defined(__thumb2__) +.thumb +#else .code 32 - -#ifdef __clang__ -#define ldrplb ldrbpl -#define ldrneb ldrbne #endif .type rem_4bit,%object @@ -142,19 +165,27 @@ rem_4bit: .type rem_4bit_get,%function rem_4bit_get: - sub $rem_4bit,pc,#8 - sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit +#if defined(__thumb2__) + adr $rem_4bit,rem_4bit +#else + sub $rem_4bit,pc,#8+32 @ &rem_4bit +#endif b .Lrem_4bit_got nop + nop .size rem_4bit_get,.-rem_4bit_get .global gcm_ghash_4bit .type gcm_ghash_4bit,%function +.align 4 gcm_ghash_4bit: - sub r12,pc,#8 +#if defined(__thumb2__) + adr r12,rem_4bit +#else + sub r12,pc,#8+48 @ &rem_4bit +#endif add $len,$inp,$len @ $len to point at the end stmdb sp!,{r3-r11,lr} @ save $len/end too - sub r12,r12,#48 @ &rem_4bit ldmia r12,{r4-r11} @ copy rem_4bit ... stmdb sp!,{r4-r11} @ ... to stack @@ -201,6 +232,9 @@ gcm_ghash_4bit: eor $Zlh,$Zlh,$Zhl,lsl#28 ldrh $Tll,[sp,$nlo] @ rem_4bit[rem] eor $Zhl,$Thl,$Zhl,lsr#4 +#ifdef __thumb2__ + it pl +#endif ldrplb $nlo,[$inp,$cnt] eor $Zhl,$Zhl,$Zhh,lsl#28 eor $Zhh,$Thh,$Zhh,lsr#4 @@ -211,6 +245,9 @@ gcm_ghash_4bit: add $nhi,$nhi,$nhi ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] eor $Zll,$Tll,$Zll,lsr#4 +#ifdef __thumb2__ + it pl +#endif ldrplb $Tll,[$Xi,$cnt] eor $Zll,$Zll,$Zlh,lsl#28 eor $Zlh,$Tlh,$Zlh,lsr#4 @@ -218,8 +255,14 @@ gcm_ghash_4bit: eor $Zlh,$Zlh,$Zhl,lsl#28 eor $Zhl,$Thl,$Zhl,lsr#4 eor $Zhl,$Zhl,$Zhh,lsl#28 +#ifdef __thumb2__ + it pl +#endif eorpl $nlo,$nlo,$Tll eor $Zhh,$Thh,$Zhh,lsr#4 +#ifdef __thumb2__ + itt pl +#endif andpl $nhi,$nlo,#0xf0 andpl $nlo,$nlo,#0x0f eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem] @@ -229,7 +272,11 @@ gcm_ghash_4bit: add $inp,$inp,#16 mov $nhi,$Zll ___ - &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]"); + &Zsmash("cmp\t$inp,$len","\n". + "#ifdef __thumb2__\n". + " it ne\n". + "#endif\n". + " ldrneb $nlo,[$inp,#15]"); $code.=<<___; bne .Louter @@ -287,6 +334,9 @@ gcm_gmult_4bit: eor $Zlh,$Zlh,$Zhl,lsl#28 ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem] eor $Zhl,$Thl,$Zhl,lsr#4 +#ifdef __thumb2__ + it pl +#endif ldrplb $nlo,[$Xi,$cnt] eor $Zhl,$Zhl,$Zhh,lsl#28 eor $Zhh,$Thh,$Zhh,lsr#4 @@ -304,6 +354,9 @@ gcm_gmult_4bit: eor $Zhl,$Thl,$Zhl,lsr#4 eor $Zhl,$Zhl,$Zhh,lsl#28 eor $Zhh,$Thh,$Zhh,lsr#4 +#ifdef __thumb2__ + itt pl +#endif andpl $nhi,$nlo,#0xf0 andpl $nlo,$nlo,#0x0f eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] @@ -378,9 +431,9 @@ $code.=<<___; .type gcm_init_neon,%function .align 4 gcm_init_neon: - vld1.64 $IN#hi,[r1,:64]! @ load H + vld1.64 $IN#hi,[r1]! @ load H vmov.i8 $t0,#0xe1 - vld1.64 $IN#lo,[r1,:64] + vld1.64 $IN#lo,[r1] vshl.i64 $t0#hi,#57 vshr.u64 $t0#lo,#63 @ t0=0xc2....01 vdup.8 $t1,$IN#hi[7] @@ -399,8 +452,8 @@ gcm_init_neon: .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - vld1.64 $IN#hi,[$Xi,:64]! @ load Xi - vld1.64 $IN#lo,[$Xi,:64]! + vld1.64 $IN#hi,[$Xi]! @ load Xi + vld1.64 $IN#lo,[$Xi]! vmov.i64 $k48,#0x0000ffffffffffff vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H vmov.i64 $k32,#0x00000000ffffffff @@ -417,8 +470,8 @@ gcm_gmult_neon: .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi - vld1.64 $Xl#lo,[$Xi,:64]! + vld1.64 $Xl#hi,[$Xi]! @ load Xi + vld1.64 $Xl#lo,[$Xi]! vmov.i64 $k48,#0x0000ffffffffffff vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H vmov.i64 $k32,#0x00000000ffffffff @@ -472,9 +525,9 @@ $code.=<<___; #ifdef __ARMEL__ vrev64.8 $Xl,$Xl #endif - sub $Xi,#16 - vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi - vst1.64 $Xl#lo,[$Xi,:64] + sub $Xi,#16 + vst1.64 $Xl#hi,[$Xi]! @ write out Xi + vst1.64 $Xl#lo,[$Xi] ret @ bx lr .size gcm_ghash_neon,.-gcm_ghash_neon diff --git a/crypto/modes/asm/ghash-c64xplus.pl b/crypto/modes/asm/ghash-c64xplus.pl new file mode 100755 index 000000000000..3cadda39945c --- /dev/null +++ b/crypto/modes/asm/ghash-c64xplus.pl @@ -0,0 +1,247 @@ +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# December 2011 +# +# The module implements GCM GHASH function and underlying single +# multiplication operation in GF(2^128). Even though subroutines +# have _4bit suffix, they are not using any tables, but rely on +# hardware Galois Field Multiply support. Streamed GHASH processes +# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven +# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are +# comparing apples vs. oranges, but compiler surely could have done +# better, because theoretical [though not necessarily achievable] +# estimate for "4-bit" table-driven implementation is ~12 cycles. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6"); # arguments + +($Z0,$Z1,$Z2,$Z3, $H0, $H1, $H2, $H3, + $H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27)); +($H01u,$H01y,$H2u,$H3u, $H0y,$H1y,$H2y,$H3y, + $H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27)); +($FF000000,$E10000)=("B30","B31"); +($xip,$x0,$x1,$xib)=map("B$_",(6..9)); # $xip zaps $len + $xia="A9"; +($rem,$res)=("B4","B5"); # $rem zaps $Htable + +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg gcm_gmult_1bit,_gcm_gmult_1bit + .asg gcm_gmult_4bit,_gcm_gmult_4bit + .asg gcm_ghash_4bit,_gcm_ghash_4bit + .endif + + .asg B3,RA + + .if 0 + .global _gcm_gmult_1bit +_gcm_gmult_1bit: + ADDAD $Htable,2,$Htable + .endif + .global _gcm_gmult_4bit +_gcm_gmult_4bit: + .asmfunc + LDDW *${Htable}[-1],$H1:$H0 ; H.lo + LDDW *${Htable}[-2],$H3:$H2 ; H.hi +|| MV $Xip,${xip} ; reassign Xi +|| MVK 15,B1 ; SPLOOPD constant + + MVK 0xE1,$E10000 +|| LDBU *++${xip}[15],$x1 ; Xi[15] + MVK 0xFF,$FF000000 +|| LDBU *--${xip},$x0 ; Xi[14] + SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial + SHL $FF000000,24,$FF000000 ; upper byte mask +|| BNOP ghash_loop? +|| MVK 1,B0 ; take a single spin + + PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes + AND $H2,$FF000000,$H2u ; H2's upper byte + AND $H3,$FF000000,$H3u ; H3's upper byte +|| SHRU $H2u,8,$H2u + SHRU $H3u,8,$H3u +|| ZERO $Z1:$Z0 + SHRU2 $xia,8,$H01u +|| ZERO $Z3:$Z2 + .endasmfunc + + .global _gcm_ghash_4bit +_gcm_ghash_4bit: + .asmfunc + LDDW *${Htable}[-1],$H1:$H0 ; H.lo +|| SHRU $len,4,B0 ; reassign len + LDDW *${Htable}[-2],$H3:$H2 ; H.hi +|| MV $Xip,${xip} ; reassign Xi +|| MVK 15,B1 ; SPLOOPD constant + + MVK 0xE1,$E10000 +|| [B0] LDNDW *${inp}[1],$H1x:$H0x + MVK 0xFF,$FF000000 +|| [B0] LDNDW *${inp}++[2],$H3x:$H2x + SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial +|| LDDW *${xip}[1],$Z1:$Z0 + SHL $FF000000,24,$FF000000 ; upper byte mask +|| LDDW *${xip}[0],$Z3:$Z2 + + PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes + AND $H2,$FF000000,$H2u ; H2's upper byte + AND $H3,$FF000000,$H3u ; H3's upper byte +|| SHRU $H2u,8,$H2u + SHRU $H3u,8,$H3u + SHRU2 $xia,8,$H01u + +|| [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + .if .LITTLE_ENDIAN + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z1,16,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .else + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z0,8,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .endif + STDW $Z3:$Z2,*${xip}[0] +|| [B0] ZERO $Z3:$Z2 +|| [B0] MV $xia,$x1 + [B0] ADDK 14,${xip} + +ghash_loop?: + SPLOOPD 6 ; 6*16+7 +|| MVC B1,ILC +|| [B0] SUB B0,1,B0 +|| ZERO A0 +|| ADD $x1,$x1,$xib ; SHL $x1,1,$xib +|| SHL $x1,1,$xia +___ + +########____________________________ +# 0 D2. M1 M2 | +# 1 M1 | +# 2 M1 M2 | +# 3 D1. M1 M2 | +# 4 S1. L1 | +# 5 S2 S1x L1 D2 L2 |____________________________ +# 6/0 L1 S1 L2 S2x |D2. M1 M2 | +# 7/1 L1 S1 D1x S2 M2 | M1 | +# 8/2 S1 L1x S2 | M1 M2 | +# 9/3 S1 L1x | D1. M1 M2 | +# 10/4 D1x | S1. L1 | +# 11/5 |S2 S1x L1 D2 L2 |____________ +# 12/6/0 D1x __| L1 S1 L2 S2x |D2. .... +# 7/1 L1 S1 D1x S2 M2 | .... +# 8/2 S1 L1x S2 | .... +#####... ................|............ +$code.=<<___; + XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1) +|| XORMPY $H01u,$xib,$H01y +|| [A0] LDBU *--${xip},$x0 + XORMPY $H1,$xia,$H1x ; 1 + XORMPY $H2,$xia,$H2x ; 2 +|| XORMPY $H2u,$xib,$H2y + XORMPY $H3,$xia,$H3x ; 3 +|| XORMPY $H3u,$xib,$H3y +||[!A0] MVK.D 15,A0 ; *--${xip} counter + XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1) +|| [A0] SUB.S A0,1,A0 + XOR.L $H1x,$Z1,$Z1 ; 5 +|| AND.D $H01y,$FF000000,$H0z +|| SWAP2.L $H01y,$H1y ; ; SHL $H01y,16,$H1y +|| SHL $x0,1,$xib +|| SHL $x0,1,$xia + + XOR.L $H2x,$Z2,$Z2 ; 6/0 ; [0,0] in epilogue +|| SHL $Z0,1,$rem ; ; rem=Z<<1 +|| SHRMB.S $Z1,$Z0,$Z0 ; ; Z>>=8 +|| AND.L $H1y,$FF000000,$H1z + XOR.L $H3x,$Z3,$Z3 ; 7/1 +|| SHRMB.S $Z2,$Z1,$Z1 +|| XOR.D $H0z,$Z0,$Z0 ; merge upper byte products +|| AND.S $H2y,$FF000000,$H2z +|| XORMPY $E10000,$rem,$res ; ; implicit rem&0x1FE + XOR.L $H1z,$Z1,$Z1 ; 8/2 +|| SHRMB.S $Z3,$Z2,$Z2 +|| AND.S $H3y,$FF000000,$H3z + XOR.L $H2z,$Z2,$Z2 ; 9/3 +|| SHRU $Z3,8,$Z3 + XOR.D $H3z,$Z3,$Z3 ; 10/4 + NOP ; 11/5 + + SPKERNEL 0,2 +|| XOR.D $res,$Z3,$Z3 ; 12/6/0; Z^=res + + ; input pre-fetch is possible where D1 slot is available... + [B0] LDNDW *${inp}[1],$H1x:$H0x ; 8/- + [B0] LDNDW *${inp}++[2],$H3x:$H2x ; 9/- + NOP ; 10/- + .if .LITTLE_ENDIAN + SWAP2 $Z0,$Z1 ; 11/- +|| SWAP4 $Z1,$Z0 + SWAP4 $Z1,$Z1 ; 12/- +|| SWAP2 $Z0,$Z0 + SWAP2 $Z2,$Z3 +|| SWAP4 $Z3,$Z2 +||[!B0] BNOP RA + SWAP4 $Z3,$Z3 +|| SWAP2 $Z2,$Z2 +|| [B0] BNOP ghash_loop? + [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z1,16,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .else + [!B0] BNOP RA ; 11/- + [B0] BNOP ghash_loop? ; 12/- + [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z0,8,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .endif + STDW $Z3:$Z2,*${xip}[0] +|| [B0] ZERO $Z3:$Z2 +|| [B0] MV $xia,$x1 + [B0] ADDK 14,${xip} + .endasmfunc + + .sect .const + .cstring "GHASH for C64x+, CRYPTOGAMS by <appro\@openssl.org>" + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/modes/asm/ghash-ia64.pl b/crypto/modes/asm/ghash-ia64.pl index 0354c9544485..eb9ded91e5b6 100755 --- a/crypto/modes/asm/ghash-ia64.pl +++ b/crypto/modes/asm/ghash-ia64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -32,7 +39,7 @@ # Itanium performance should remain the same as the "256B" version, # i.e. ~8.5 cycles. -$output=shift and (open STDOUT,">$output" or die "can't open $output: $!"); +$output=pop and (open STDOUT,">$output" or die "can't open $output: $!"); if ($^O eq "hpux") { $ADDP="addp4"; @@ -149,7 +156,7 @@ $code.=<<___; ___ ###################################################################### -# "528B" (well, "512B" actualy) streamed GHASH +# "528B" (well, "512B" actually) streamed GHASH # $Xip="in0"; $Htbl="in1"; diff --git a/crypto/modes/asm/ghash-parisc.pl b/crypto/modes/asm/ghash-parisc.pl index d5ad96b40335..a614c99c22ce 100755 --- a/crypto/modes/asm/ghash-parisc.pl +++ b/crypto/modes/asm/ghash-parisc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -698,7 +705,7 @@ my $depd = sub { my ($mod,$args) = @_; my $orig = "depd$mod\t$args"; - # I only have ",z" completer, it's impicitly encoded... + # I only have ",z" completer, it's implicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); my $cpos=63-$2; @@ -717,6 +724,11 @@ sub assemble { ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler/) { + $gnuas = 1; +} + foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; if ($SIZE_T==4) { @@ -724,7 +736,12 @@ foreach (split("\n",$code)) { s/cmpb,\*/comb,/; s/,\*/,/; } - s/\bbv\b/bve/ if ($SIZE_T==8); + + s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); + s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); + s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); + s/\bbv\b/bve/ if ($SIZE_T==8); + print $_,"\n"; } diff --git a/crypto/modes/asm/ghash-s390x.pl b/crypto/modes/asm/ghash-s390x.pl index be7d55f74876..17dc375053c5 100755 --- a/crypto/modes/asm/ghash-s390x.pl +++ b/crypto/modes/asm/ghash-s390x.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -47,7 +54,7 @@ if ($flavour =~ /3[12]/) { $g="g"; } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $softonly=0; @@ -73,6 +80,8 @@ $rem_4bit="%r14"; $sp="%r15"; $code.=<<___; +#include "s390x_arch.h" + .text .globl gcm_gmult_4bit @@ -81,16 +90,14 @@ gcm_gmult_4bit: ___ $code.=<<___ if(!$softonly && 0); # hardware is slow for single block... larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security-assist - jz .Lsoft_gmult lghi %r0,0 - lg %r1,24(%r1) # load second word of kimd capabilities vector + lg %r1,S390X_KIMD+8(%r1) # load second word of kimd capabilities + # vector tmhh %r1,0x4000 # check for function 65 jz .Lsoft_gmult stg %r0,16($sp) # arrange 16 bytes of zero input stg %r0,24($sp) - lghi %r0,65 # function 65 + lghi %r0,S390X_GHASH # function 65 la %r1,0($Xi) # H lies right after Xi in gcm128_context la $inp,16($sp) lghi $len,16 @@ -119,16 +126,11 @@ gcm_ghash_4bit: ___ $code.=<<___ if(!$softonly); larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security-assist - jz .Lsoft_ghash - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0004 # kimd %r0,%r4 - lg %r1,24($sp) - tmhh %r1,0x4000 # check for function 65 + lg %r0,S390X_KIMD+8(%r1) # load second word of kimd capabilities + # vector + tmhh %r0,0x4000 # check for function 65 jz .Lsoft_ghash - lghi %r0,65 # function 65 + lghi %r0,S390X_GHASH # function 65 la %r1,0($Xi) # H lies right after Xi in gcm128_context .long 0xb93e0004 # kimd %r0,$inp brc 1,.-4 # pay attention to "partial completion" @@ -151,7 +153,7 @@ $code.=<<___; lg $Zhi,0+1($Xi) lghi $tmp,0 .Louter: - xg $Zhi,0($inp) # Xi ^= inp + xg $Zhi,0($inp) # Xi ^= inp xg $Zlo,8($inp) xgr $Zhi,$tmp stg $Zlo,8+1($Xi) diff --git a/crypto/modes/asm/ghash-sparcv9.pl b/crypto/modes/asm/ghash-sparcv9.pl index b129ba706f0f..c4eb3b1f0206 100755 --- a/crypto/modes/asm/ghash-sparcv9.pl +++ b/crypto/modes/asm/ghash-sparcv9.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -46,14 +53,12 @@ # saturates at ~15.5x single-process result on 8-core processor, # or ~20.5GBps per 2.85GHz socket. -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } - -$output=shift; +$output=pop; open STDOUT,">$output"; +$frame="STACK_FRAME"; +$bias="STACK_BIAS"; + $Zhi="%o0"; # 64-bit values $Zlo="%o1"; $Thi="%o2"; @@ -75,11 +80,14 @@ $Htbl="%i1"; $inp="%i2"; $len="%i3"; -$code.=<<___ if ($bits==64); +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif + .section ".text",#alloc,#execinstr .align 64 @@ -183,7 +191,7 @@ gcm_ghash_4bit: add $inp,16,$inp cmp $inp,$len - be,pn `$bits==64?"%xcc":"%icc"`,.Ldone + be,pn SIZE_T_CC,.Ldone and $Zlo,0xf,$remi ldx [$Htblo+$nhi],$Tlo @@ -532,7 +540,7 @@ ___ # Purpose of these subroutines is to explicitly encode VIS instructions, # so that one can compile the module without having to specify VIS -# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. # Idea is to reserve for option to produce "universal" binary and let # programmer detect if current CPU is VIS capable at run-time. sub unvis3 { diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl index 0269169fa743..bcbe6e399d13 100755 --- a/crypto/modes/asm/ghash-x86.pl +++ b/crypto/modes/asm/ghash-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -88,7 +95,7 @@ # where Tproc is time required for Karatsuba pre- and post-processing, # is more realistic estimate. In this case it gives ... 1.91 cycles. # Or in other words, depending on how well we can interleave reduction -# and one of the two multiplications the performance should be betwen +# and one of the two multiplications the performance should be between # 1.91 and 2.16. As already mentioned, this implementation processes # one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart # - in 2.02. x86_64 performance is better, because larger register @@ -96,14 +103,13 @@ # # Does it make sense to increase Naggr? To start with it's virtually # impossible in 32-bit mode, because of limited register bank -# capacity. Otherwise improvement has to be weighed agiainst slower +# capacity. Otherwise improvement has to be weighed against slower # setup, as well as code size and complexity increase. As even # optimistic estimate doesn't promise 30% performance improvement, # there are currently no plans to increase Naggr. # -# Special thanks to David Woodhouse <dwmw2@infradead.org> for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. # January 2010 # @@ -129,7 +135,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); +$output=pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386"); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -712,7 +721,7 @@ sub mmx_loop() { &pxor ($red[1],$red[1]); &pxor ($red[2],$red[2]); - # Just like in "May" verson modulo-schedule for critical path in + # Just like in "May" version modulo-schedule for critical path in # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' # is scheduled so late that rem_8bit[] has to be shifted *right* # by 16, which is why last argument to pinsrw is 2, which @@ -801,7 +810,7 @@ sub mmx_loop() { &bswap ($dat); &pshufw ($Zhi,$Zhi,0b00011011); # 76543210 &bswap ("ebx"); - + &cmp ("ecx",&DWP(528+16+8,"esp")); # are we done? &jne (&label("outer")); } @@ -905,7 +914,7 @@ my ($Xhi,$Xi) = @_; &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # @@ -1075,7 +1084,7 @@ my ($Xhi,$Xi) = @_; &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # &pshufd ($T1,$Xhn,0b01001110); @@ -1138,7 +1147,7 @@ my ($Xhi,$Xi) = @_; &movdqu (&QWP(0,$Xip),$Xi); &function_end("gcm_ghash_clmul"); -} else { # Algorith 5. Kept for reference purposes. +} else { # Algorithm 5. Kept for reference purposes. sub reduction_alg5 { # 19/16 times faster than Intel version my ($Xhi,$Xi)=@_; @@ -1369,6 +1378,8 @@ my ($Xhi,$Xi)=@_; &asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>"); &asm_finish(); +close STDOUT; + # A question was risen about choice of vanilla MMX. Or rather why wasn't # SSE2 chosen instead? In addition to the fact that MMX runs on legacy # CPUs such as PIII, "4-bit" MMX version was observed to provide better diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index f889f2018789..afc30c3e72a4 100755 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -37,9 +44,8 @@ # See ghash-x86.pl for background information and details about coding # techniques. # -# Special thanks to David Woodhouse <dwmw2@infradead.org> for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. # December 2012 # @@ -64,8 +70,11 @@ # Ivy Bridge 1.80(+7%) # Haswell 0.55(+93%) (if system doesn't support AVX) # Broadwell 0.45(+110%)(if system doesn't support AVX) +# Skylake 0.44(+110%)(if system doesn't support AVX) # Bulldozer 1.49(+27%) # Silvermont 2.88(+13%) +# Knights L 2.12(-) (if system doesn't support AVX) +# Goldmont 1.08(+24%) # March 2013 # @@ -74,8 +83,10 @@ # CPUs such as Sandy and Ivy Bridge can execute it, the code performs # sub-optimally in comparison to above mentioned version. But thanks # to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that -# it performs in 0.41 cycles per byte on Haswell processor, and in -# 0.29 on Broadwell. +# it performs in 0.41 cycles per byte on Haswell processor, in +# 0.29 on Broadwell, and in 0.36 on Skylake. +# +# Knights Landing achieves 1.09 cpb. # # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest @@ -109,7 +120,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $do4xaggr=1; @@ -227,9 +238,21 @@ $code=<<___; .type gcm_gmult_4bit,\@function,2 .align 16 gcm_gmult_4bit: +.cfi_startproc push %rbx - push %rbp # %rbp and %r12 are pushed exclusively in +.cfi_push %rbx + push %rbp # %rbp and others are pushed exclusively in +.cfi_push %rbp push %r12 # order to reuse Win64 exception handler... +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + sub \$280,%rsp +.cfi_adjust_cfa_offset 280 .Lgmult_prologue: movzb 15($Xi),$Zlo @@ -240,10 +263,15 @@ $code.=<<___; mov $Zlo,8($Xi) mov $Zhi,($Xi) - mov 16(%rsp),%rbx - lea 24(%rsp),%rsp + lea 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lgmult_epilogue: ret +.cfi_endproc .size gcm_gmult_4bit,.-gcm_gmult_4bit ___ @@ -257,13 +285,21 @@ $code.=<<___; .type gcm_ghash_4bit,\@function,4 .align 16 gcm_ghash_4bit: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 sub \$280,%rsp +.cfi_adjust_cfa_offset 280 .Lghash_prologue: mov $inp,%r14 # reassign couple of args mov $len,%r15 @@ -391,16 +427,25 @@ $code.=<<___; mov $Zlo,8($Xi) mov $Zhi,($Xi) - lea 280(%rsp),%rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + lea 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea 0(%rsi),%rsp +.cfi_def_cfa_register %rsp .Lghash_epilogue: ret +.cfi_endproc .size gcm_ghash_4bit,.-gcm_ghash_4bit ___ @@ -460,7 +505,7 @@ $code.=<<___; psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # @@ -574,7 +619,7 @@ ___ &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2); $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0)); # experimental alternative. special thing about is that there - # no dependency between the two multiplications... + # no dependency between the two multiplications... mov \$`0xE1<<1`,%eax mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff mov \$0x07,%r11d @@ -749,7 +794,7 @@ $code.=<<___; movdqa $T2,$T1 # pslldq \$8,$T2 pclmulqdq \$0x00,$Hkey2,$Xln - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # movdqu 0($inp),$T1 @@ -885,7 +930,7 @@ $code.=<<___; psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pshufd \$0b01001110,$Xhn,$Xmn pxor $T1,$Xhi # @@ -1639,14 +1684,20 @@ se_handler: cmp %r10,%rbx # context->Rip>=epilogue label jae .Lin_prologue - lea 24(%rax),%rax # adjust "rsp" + lea 48+280(%rax),%rax # adjust "rsp" mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 .Lin_prologue: mov 8(%rax),%rdi diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl index 71457cf4fc59..6a2ac712950b 100755 --- a/crypto/modes/asm/ghashp8-ppc.pl +++ b/crypto/modes/asm/ghashp8-ppc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -16,7 +23,14 @@ # Relative comparison is therefore more informative. This initial # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x # faster than "4-bit" integer-only compiler-generated 64-bit code. -# "Initial version" means that there is room for futher improvement. +# "Initial version" means that there is room for further improvement. + +# May 2016 +# +# 2x aggregated reduction improves performance by 50% (resulting +# performance on POWER8 is 1 cycle per processed byte), and 4x +# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). +# POWER9 delivers 0.51 cpb. $flavour=shift; $output =shift; @@ -27,14 +41,21 @@ if ($flavour =~ /64/) { $STU="stdu"; $POP="ld"; $PUSH="std"; + $UCMP="cmpld"; + $SHRI="srdi"; } elsif ($flavour =~ /32/) { $SIZE_T=4; $LRSAVE=$SIZE_T; $STU="stwu"; $POP="lwz"; $PUSH="stw"; + $UCMP="cmplw"; + $SHRI="srwi"; } else { die "nonsense $flavour"; } +$sp="r1"; +$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -46,6 +67,7 @@ my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); +my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); my $vrsave="r12"; $code=<<___; @@ -56,7 +78,7 @@ $code=<<___; .globl .gcm_init_p8 .align 5 .gcm_init_p8: - lis r0,0xfff0 + li r0,-4096 li r8,0x10 mfspr $vrsave,256 li r9,0x20 @@ -78,17 +100,103 @@ $code=<<___; vsl $H,$H,$t0 # H<<=1 vsrab $t1,$t1,$t2 # broadcast carry bit vand $t1,$t1,$xC2 - vxor $H,$H,$t1 # twisted H + vxor $IN,$H,$t1 # twisted H - vsldoi $H,$H,$H,8 # twist even more ... + vsldoi $H,$IN,$IN,8 # twist even more ... vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 vsldoi $Hl,$zero,$H,8 # ... and split vsldoi $Hh,$H,$zero,8 stvx_u $xC2,0,r3 # save pre-computed table stvx_u $Hl,r8,r3 + li r8,0x40 stvx_u $H, r9,r3 + li r9,0x50 stvx_u $Hh,r10,r3 + li r10,0x60 + + vpmsumd $Xl,$IN,$Hl # H.lo·H.lo + vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi + vpmsumd $Xh,$IN,$Hh # H.hi·H.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $IN1,$Xl,$t1 + + vsldoi $H2,$IN1,$IN1,8 + vsldoi $H2l,$zero,$H2,8 + vsldoi $H2h,$H2,$zero,8 + + stvx_u $H2l,r8,r3 # save H^2 + li r8,0x70 + stvx_u $H2,r9,r3 + li r9,0x80 + stvx_u $H2h,r10,r3 + li r10,0x90 +___ +{ +my ($t4,$t5,$t6) = ($Hl,$H,$Hh); +$code.=<<___; + vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo + vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo + vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi + vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi + vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi + vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vsldoi $t4,$Xm1,$zero,8 + vsldoi $t5,$zero,$Xm1,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + vxor $Xl1,$Xl1,$t4 + vxor $Xh1,$Xh1,$t5 + + vsldoi $Xl,$Xl,$Xl,8 + vsldoi $Xl1,$Xl1,$Xl1,8 + vxor $Xl,$Xl,$t2 + vxor $Xl1,$Xl1,$t6 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vpmsumd $Xl1,$Xl1,$xC2 + vxor $t1,$t1,$Xh + vxor $t5,$t5,$Xh1 + vxor $Xl,$Xl,$t1 + vxor $Xl1,$Xl1,$t5 + + vsldoi $H,$Xl,$Xl,8 + vsldoi $H2,$Xl1,$Xl1,8 + vsldoi $Hl,$zero,$H,8 + vsldoi $Hh,$H,$zero,8 + vsldoi $H2l,$zero,$H2,8 + vsldoi $H2h,$H2,$zero,8 + + stvx_u $Hl,r8,r3 # save H^3 + li r8,0xa0 + stvx_u $H,r9,r3 + li r9,0xb0 + stvx_u $Hh,r10,r3 + li r10,0xc0 + stvx_u $H2l,r8,r3 # save H^4 + stvx_u $H2,r9,r3 + stvx_u $H2h,r10,r3 mtspr 256,$vrsave blr @@ -96,7 +204,9 @@ $code=<<___; .byte 0,12,0x14,0,0,0,2,0 .long 0 .size .gcm_init_p8,.-.gcm_init_p8 - +___ +} +$code.=<<___; .globl .gcm_gmult_p8 .align 5 .gcm_gmult_p8: @@ -122,7 +232,7 @@ $code=<<___; vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - vpmsumd $t2,$Xl,$xC2 # 1st phase + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase vsldoi $t0,$Xm,$zero,8 vsldoi $t1,$zero,$Xm,8 @@ -132,7 +242,7 @@ $code=<<___; vsldoi $Xl,$Xl,$Xl,8 vxor $Xl,$Xl,$t2 - vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase vpmsumd $Xl,$Xl,$xC2 vxor $t1,$t1,$Xh vxor $Xl,$Xl,$t1 @@ -150,7 +260,7 @@ $code=<<___; .globl .gcm_ghash_p8 .align 5 .gcm_ghash_p8: - lis r0,0xfff8 + li r0,-4096 li r8,0x10 mfspr $vrsave,256 li r9,0x20 @@ -159,52 +269,99 @@ $code=<<___; lvx_u $Xl,0,$Xip # load Xi lvx_u $Hl,r8,$Htbl # load pre-computed table + li r8,0x40 le?lvsl $lemask,r0,r0 lvx_u $H, r9,$Htbl + li r9,0x50 le?vspltisb $t0,0x07 lvx_u $Hh,r10,$Htbl + li r10,0x60 le?vxor $lemask,$lemask,$t0 lvx_u $xC2,0,$Htbl le?vperm $Xl,$Xl,$Xl,$lemask vxor $zero,$zero,$zero + ${UCMP}i $len,64 + bge Lgcm_ghash_p8_4x + lvx_u $IN,0,$inp addi $inp,$inp,16 - subi $len,$len,16 + subic. $len,$len,16 le?vperm $IN,$IN,$IN,$lemask vxor $IN,$IN,$Xl - b Loop + beq Lshort + + lvx_u $H2l,r8,$Htbl # load H^2 + li r8,16 + lvx_u $H2, r9,$Htbl + add r9,$inp,$len # end of input + lvx_u $H2h,r10,$Htbl + be?b Loop_2x .align 5 -Loop: - subic $len,$len,16 - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - subfe. r0,r0,r0 # borrow?-1:0 - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi +Loop_2x: + lvx_u $IN1,0,$inp + le?vperm $IN1,$IN1,$IN1,$lemask + + subic $len,$len,32 + vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo + vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo + subfe r0,r0,r0 # borrow?-1:0 + vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi + vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi and r0,r0,$len - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi + vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi add $inp,$inp,r0 - vpmsumd $t2,$Xl,$xC2 # 1st phase + vxor $Xl,$Xl,$Xl1 + vxor $Xm,$Xm,$Xm1 + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase vsldoi $t0,$Xm,$zero,8 vsldoi $t1,$zero,$Xm,8 + vxor $Xh,$Xh,$Xh1 vxor $Xl,$Xl,$t0 vxor $Xh,$Xh,$t1 vsldoi $Xl,$Xl,$Xl,8 vxor $Xl,$Xl,$t2 - lvx_u $IN,0,$inp - addi $inp,$inp,16 + lvx_u $IN,r8,$inp + addi $inp,$inp,32 - vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase vpmsumd $Xl,$Xl,$xC2 le?vperm $IN,$IN,$IN,$lemask vxor $t1,$t1,$Xh vxor $IN,$IN,$t1 vxor $IN,$IN,$Xl - beq Loop # did $len-=16 borrow? + $UCMP r9,$inp + bgt Loop_2x # done yet? + + cmplwi $len,0 + bne Leven + +Lshort: + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh +Leven: vxor $Xl,$Xl,$t1 le?vperm $Xl,$Xl,$Xl,$lemask stvx_u $Xl,0,$Xip # write out Xi @@ -214,6 +371,284 @@ Loop: .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +___ +{ +my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h, + $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31)); +my $IN0=$IN; +my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h); + +$code.=<<___; +.align 5 +.gcm_ghash_p8_4x: +Lgcm_ghash_p8_4x: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + li r10,0x60 + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME-4`($sp) # save vrsave + mtspr 256,r0 # preserve all AltiVec registers + + lvsl $t0,0,r8 # 0x0001..0e0f + #lvx_u $H2l,r8,$Htbl # load H^2 + li r8,0x70 + lvx_u $H2, r9,$Htbl + li r9,0x80 + vspltisb $t1,8 # 0x0808..0808 + #lvx_u $H2h,r10,$Htbl + li r10,0x90 + lvx_u $H3l,r8,$Htbl # load H^3 + li r8,0xa0 + lvx_u $H3, r9,$Htbl + li r9,0xb0 + lvx_u $H3h,r10,$Htbl + li r10,0xc0 + lvx_u $H4l,r8,$Htbl # load H^4 + li r8,0x10 + lvx_u $H4, r9,$Htbl + li r9,0x20 + lvx_u $H4h,r10,$Htbl + li r10,0x30 + + vsldoi $t2,$zero,$t1,8 # 0x0000..0808 + vaddubm $hiperm,$t0,$t2 # 0x0001..1617 + vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f + + $SHRI $len,$len,4 # this allows to use sign bit + # as carry + lvx_u $IN0,0,$inp # load input + lvx_u $IN1,r8,$inp + subic. $len,$len,8 + lvx_u $IN2,r9,$inp + lvx_u $IN3,r10,$inp + addi $inp,$inp,0x40 + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + le?vperm $IN3,$IN3,$IN3,$lemask + + vxor $Xh,$IN0,$Xl + + vpmsumd $Xl1,$IN1,$H3l + vpmsumd $Xm1,$IN1,$H3 + vpmsumd $Xh1,$IN1,$H3h + + vperm $H21l,$H2,$H,$hiperm + vperm $t0,$IN2,$IN3,$loperm + vperm $H21h,$H2,$H,$loperm + vperm $t1,$IN2,$IN3,$hiperm + vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo + vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo + vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi + vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi + + vxor $Xm2,$Xm2,$Xm1 + vxor $Xl3,$Xl3,$Xl1 + vxor $Xm3,$Xm3,$Xm2 + vxor $Xh3,$Xh3,$Xh1 + + blt Ltail_4x + +Loop_4x: + lvx_u $IN0,0,$inp + lvx_u $IN1,r8,$inp + subic. $len,$len,4 + lvx_u $IN2,r9,$inp + lvx_u $IN3,r10,$inp + addi $inp,$inp,0x40 + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + le?vperm $IN3,$IN3,$IN3,$lemask + le?vperm $IN0,$IN0,$IN0,$lemask + + vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo + vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi + vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi + vpmsumd $Xl1,$IN1,$H3l + vpmsumd $Xm1,$IN1,$H3 + vpmsumd $Xh1,$IN1,$H3h + + vxor $Xl,$Xl,$Xl3 + vxor $Xm,$Xm,$Xm3 + vxor $Xh,$Xh,$Xh3 + vperm $t0,$IN2,$IN3,$loperm + vperm $t1,$IN2,$IN3,$hiperm + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo + vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi + vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi + vpmsumd $Xl,$Xl,$xC2 + + vxor $Xl3,$Xl3,$Xl1 + vxor $Xh3,$Xh3,$Xh1 + vxor $Xh,$Xh,$IN0 + vxor $Xm2,$Xm2,$Xm1 + vxor $Xh,$Xh,$t1 + vxor $Xm3,$Xm3,$Xm2 + vxor $Xh,$Xh,$Xl + bge Loop_4x + +Ltail_4x: + vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo + vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi + vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi + + vxor $Xl,$Xl,$Xl3 + vxor $Xm,$Xm,$Xm3 + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xh,$Xh,$Xh3 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + addic. $len,$len,4 + beq Ldone_4x + + lvx_u $IN0,0,$inp + ${UCMP}i $len,2 + li $len,-4 + blt Lone + lvx_u $IN1,r8,$inp + beq Ltwo + +Lthree: + lvx_u $IN2,r9,$inp + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + + vxor $Xh,$IN0,$Xl + vmr $H4l,$H3l + vmr $H4, $H3 + vmr $H4h,$H3h + + vperm $t0,$IN1,$IN2,$loperm + vperm $t1,$IN1,$IN2,$hiperm + vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo + vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi + vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo + vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi + + vxor $Xm3,$Xm3,$Xm2 + b Ltail_4x + +.align 4 +Ltwo: + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + + vxor $Xh,$IN0,$Xl + vperm $t0,$zero,$IN1,$loperm + vperm $t1,$zero,$IN1,$hiperm + + vsldoi $H4l,$zero,$H2,8 + vmr $H4, $H2 + vsldoi $H4h,$H2,$zero,8 + + vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo + vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi + vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi + + b Ltail_4x + +.align 4 +Lone: + le?vperm $IN0,$IN0,$IN0,$lemask + + vsldoi $H4l,$zero,$H,8 + vmr $H4, $H + vsldoi $H4h,$H,$zero,8 + + vxor $Xh,$IN0,$Xl + vxor $Xl3,$Xl3,$Xl3 + vxor $Xm3,$Xm3,$Xm3 + vxor $Xh3,$Xh3,$Xh3 + + b Ltail_4x + +Ldone_4x: + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtspr 256,$vrsave + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,0,0x80,0,4,0 + .long 0 +___ +} +$code.=<<___; .size .gcm_ghash_p8,.-.gcm_ghash_p8 .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" @@ -221,6 +656,8 @@ Loop: ___ foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + if ($flavour =~ /le$/o) { # little-endian s/le\?//o or s/be\?/#be#/o; diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl index 0886d2180702..47e882008069 100755 --- a/crypto/modes/asm/ghashv8-armx.pl +++ b/crypto/modes/asm/ghashv8-armx.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -12,26 +19,42 @@ # June 2014 # # Initial version was developed in tight cooperation with Ard -# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from -# other assembly modules. Just like aesv8-armx.pl this module -# supports both AArch32 and AArch64 execution modes. +# Biesheuvel of Linaro from bits-n-pieces from other assembly modules. +# Just like aesv8-armx.pl this module supports both AArch32 and +# AArch64 execution modes. # # July 2014 # # Implement 2x aggregated reduction [see ghash-x86.pl for background # information]. # +# November 2017 +# +# AArch64 register bank to "accommodate" 4x aggregated reduction and +# improve performance by 20-70% depending on processor. +# # Current performance in cycles per processed byte: # -# PMULL[2] 32-bit NEON(*) -# Apple A7 0.92 5.62 -# Cortex-A53 1.01 8.39 -# Cortex-A57 1.17 7.61 +# 64-bit PMULL 32-bit PMULL 32-bit NEON(*) +# Apple A7 0.58 0.92 5.62 +# Cortex-A53 0.85 1.01 8.39 +# Cortex-A57 0.73 1.17 7.61 +# Denver 0.51 0.65 6.02 +# Mongoose 0.65 1.10 8.06 +# Kryo 0.76 1.16 8.00 # # (*) presented for reference/comparison purposes; $flavour = shift; -open STDOUT,">".shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; $Xi="x0"; # argument block $Htbl="x1"; @@ -47,10 +70,15 @@ my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14)); $code=<<___; #include "arm_arch.h" +#if __ARM_MAX_ARCH__>=7 .text ___ $code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); -$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); +$code.=<<___ if ($flavour !~ /64/); +.fpu neon +.code 32 +#undef __thumb2__ +___ ################################################################################ # void gcm_init_v8(u128 Htable[16],const u64 H[2]); @@ -109,8 +137,56 @@ gcm_init_v8: vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing veor $t1,$t1,$H2 vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed - vst1.64 {$Hhl-$H2},[x0] @ store Htable[1..2] + vst1.64 {$Hhl-$H2},[x0],#32 @ store Htable[1..2] +___ +if ($flavour =~ /64/) { +my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7)); + +$code.=<<___; + @ calculate H^3 and H^4 + vpmull.p64 $Xl,$H, $H2 + vpmull.p64 $Yl,$H2,$H2 + vpmull2.p64 $Xh,$H, $H2 + vpmull2.p64 $Yh,$H2,$H2 + vpmull.p64 $Xm,$t0,$t1 + vpmull.p64 $Ym,$t1,$t1 + + vext.8 $t0,$Xl,$Xh,#8 @ Karatsuba post-processing + vext.8 $t1,$Yl,$Yh,#8 + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t0 + veor $t3,$Yl,$Yh + veor $Ym,$Ym,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + veor $Ym,$Ym,$t3 + vpmull.p64 $t3,$Yl,$xC2 + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Yh#lo,$Ym#hi + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vmov $Ym#hi,$Yl#lo + veor $Xl,$Xm,$t2 + veor $Yl,$Ym,$t3 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vext.8 $t3,$Yl,$Yl,#8 + vpmull.p64 $Xl,$Xl,$xC2 + vpmull.p64 $Yl,$Yl,$xC2 + veor $t2,$t2,$Xh + veor $t3,$t3,$Yh + veor $H, $Xl,$t2 @ H^3 + veor $H2,$Yl,$t3 @ H^4 + vext.8 $t0,$H, $H,#8 @ Karatsuba pre-processing + vext.8 $t1,$H2,$H2,#8 + veor $t0,$t0,$H + veor $t1,$t1,$H2 + vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed + vst1.64 {$H-$H2},[x0] @ store Htable[3..5] +___ +} +$code.=<<___; ret .size gcm_init_v8,.-gcm_init_v8 ___ @@ -179,6 +255,10 @@ $code.=<<___; .align 4 gcm_ghash_v8: ___ +$code.=<<___ if ($flavour =~ /64/); + cmp $len,#64 + b.hs .Lgcm_ghash_v8_4x +___ $code.=<<___ if ($flavour !~ /64/); vstmdb sp!,{d8-d15} @ 32-bit ABI says so ___ @@ -188,13 +268,13 @@ $code.=<<___; @ loaded value would have @ to be rotated in order to @ make it appear as in - @ alorithm specification + @ algorithm specification subs $len,$len,#32 @ see if $len is 32 or larger mov $inc,#16 @ $inc is used as post- @ increment for input pointer; @ as loop is modulo-scheduled @ $inc is zeroed just in time - @ to preclude oversteping + @ to preclude overstepping @ inp[len], which means that @ last block[s] are actually @ loaded twice, but last @@ -326,10 +406,301 @@ $code.=<<___; ret .size gcm_ghash_v8,.-gcm_ghash_v8 ___ + +if ($flavour =~ /64/) { # 4x subroutine +my ($I0,$j1,$j2,$j3, + $I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23)); + +$code.=<<___; +.type gcm_ghash_v8_4x,%function +.align 4 +gcm_ghash_v8_4x: +.Lgcm_ghash_v8_4x: + vld1.64 {$Xl},[$Xi] @ load [rotated] Xi + vld1.64 {$H-$H2},[$Htbl],#48 @ load twisted H, ..., H^2 + vmov.i8 $xC2,#0xe1 + vld1.64 {$H3-$H4},[$Htbl] @ load twisted H^3, ..., H^4 + vshl.u64 $xC2,$xC2,#57 @ compose 0xc2.0 constant + + vld1.64 {$I0-$j3},[$inp],#64 +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $j3,$j3 + vrev64.8 $I0,$I0 +#endif + vext.8 $I3,$j3,$j3,#8 + vext.8 $I2,$j2,$j2,#8 + vext.8 $I1,$j1,$j1,#8 + + vpmull.p64 $Yl,$H,$I3 @ H·Ii+3 + veor $j3,$j3,$I3 + vpmull2.p64 $Yh,$H,$I3 + vpmull.p64 $Ym,$Hhl,$j3 + + vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2 + veor $j2,$j2,$I2 + vpmull2.p64 $I2,$H2,$I2 + vpmull2.p64 $j2,$Hhl,$j2 + + veor $Yl,$Yl,$t0 + veor $Yh,$Yh,$I2 + veor $Ym,$Ym,$j2 + + vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1 + veor $j1,$j1,$I1 + vpmull2.p64 $I1,$H3,$I1 + vpmull.p64 $j1,$H34,$j1 + + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + veor $Ym,$Ym,$j1 + + subs $len,$len,#128 + b.lo .Ltail4x + + b .Loop4x + +.align 4 +.Loop4x: + veor $t0,$I0,$Xl + vld1.64 {$I0-$j3},[$inp],#64 + vext.8 $IN,$t0,$t0,#8 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $j3,$j3 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H4,$IN + vext.8 $I3,$j3,$j3,#8 + vpmull2.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + vext.8 $I2,$j2,$j2,#8 + veor $Xm,$Xm,$Ym + vext.8 $I1,$j1,$j1,#8 + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + vpmull.p64 $Yl,$H,$I3 @ H·Ii+3 + veor $j3,$j3,$I3 + veor $Xm,$Xm,$t1 + vpmull2.p64 $Yh,$H,$I3 + veor $Xm,$Xm,$t2 + vpmull.p64 $Ym,$Hhl,$j3 + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2 + veor $j2,$j2,$I2 + vpmull2.p64 $I2,$H2,$I2 + veor $Xl,$Xm,$t2 + vpmull2.p64 $j2,$Hhl,$j2 + + veor $Yl,$Yl,$t0 + veor $Yh,$Yh,$I2 + veor $Ym,$Ym,$j2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1 + veor $j1,$j1,$I1 + veor $t2,$t2,$Xh + vpmull2.p64 $I1,$H3,$I1 + vpmull.p64 $j1,$H34,$j1 + + veor $Xl,$Xl,$t2 + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + vext.8 $Xl,$Xl,$Xl,#8 + veor $Ym,$Ym,$j1 + + subs $len,$len,#64 + b.hs .Loop4x + +.Ltail4x: + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H4,$IN + vpmull2.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + + adds $len,$len,#64 + b.eq .Ldone4x + + cmp $len,#32 + b.lo .Lone + b.eq .Ltwo +.Lthree: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0-$j2},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $I2,$j2,$j2,#8 + vext.8 $I1,$j1,$j1,#8 + veor $Xl,$Xm,$t2 + + vpmull.p64 $Yl,$H,$I2 @ H·Ii+2 + veor $j2,$j2,$I2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + vpmull2.p64 $Yh,$H,$I2 + vpmull.p64 $Ym,$Hhl,$j2 + veor $Xl,$Xl,$t2 + vpmull.p64 $j3,$H2,$I1 @ H^2·Ii+1 + veor $j1,$j1,$I1 + vext.8 $Xl,$Xl,$Xl,#8 + + vpmull2.p64 $I1,$H2,$I1 + veor $t0,$I0,$Xl + vpmull2.p64 $j1,$Hhl,$j1 + vext.8 $IN,$t0,$t0,#8 + + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + veor $Ym,$Ym,$j1 + + vpmull.p64 $Xl,$H3,$IN @ H^3·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H3,$IN + vpmull.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + b .Ldone4x + +.align 4 +.Ltwo: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0-$j1},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $I1,$j1,$j1,#8 + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + + vpmull.p64 $Yl,$H,$I1 @ H·Ii+1 + veor $j1,$j1,$I1 + + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull2.p64 $Yh,$H,$I1 + vpmull.p64 $Ym,$Hhl,$j1 + + vpmull.p64 $Xl,$H2,$IN @ H^2·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H2,$IN + vpmull2.p64 $Xm,$Hhl,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + b .Ldone4x + +.align 4 +.Lone: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull.p64 $Xl,$H,$IN + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H,$IN + vpmull.p64 $Xm,$Hhl,$t0 + +.Ldone4x: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x +___ + +} } + $code.=<<___; .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 2 +#endif ___ if ($flavour =~ /64/) { ######## 64-bit code @@ -337,7 +708,8 @@ if ($flavour =~ /64/) { ######## 64-bit code my $arg=shift; $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && - sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; + sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1, + $3<8?$3:$3+8,($4 eq "lo")?0:1; } foreach(split("\n",$code)) { s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or @@ -352,7 +724,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers s/@\s/\/\//o; # old->new style commentary - # fix up remainig legacy suffixes + # fix up remaining legacy suffixes s/\.[ui]?8(\s)/$1/o; s/\.[uis]?32//o and s/\.16b/\.4s/go; m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument @@ -392,7 +764,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers s/\/\/\s?/@ /o; # new->old style commentary - # fix up remainig new-style suffixes + # fix up remaining new-style suffixes s/\],#[0-9]+/]!/o; s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or diff --git a/crypto/modes/build.info b/crypto/modes/build.info new file mode 100644 index 000000000000..821340eb909a --- /dev/null +++ b/crypto/modes/build.info @@ -0,0 +1,30 @@ +LIBS=../../libcrypto +SOURCE[../../libcrypto]=\ + cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \ + ccm128.c xts128.c wrap128.c ocb128.c \ + {- $target{modes_asm_src} -} + +INCLUDE[gcm128.o]=.. + +GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS) +GENERATE[ghash-x86.s]=asm/ghash-x86.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) +GENERATE[ghash-x86_64.s]=asm/ghash-x86_64.pl $(PERLASM_SCHEME) +GENERATE[aesni-gcm-x86_64.s]=asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) +GENERATE[ghash-sparcv9.S]=asm/ghash-sparcv9.pl $(PERLASM_SCHEME) +INCLUDE[ghash-sparcv9.o]=.. +GENERATE[ghash-alpha.S]=asm/ghash-alpha.pl $(PERLASM_SCHEME) +GENERATE[ghash-parisc.s]=asm/ghash-parisc.pl $(PERLASM_SCHEME) +GENERATE[ghashp8-ppc.s]=asm/ghashp8-ppc.pl $(PERLASM_SCHEME) +GENERATE[ghash-armv4.S]=asm/ghash-armv4.pl $(PERLASM_SCHEME) +INCLUDE[ghash-armv4.o]=.. +GENERATE[ghashv8-armx.S]=asm/ghashv8-armx.pl $(PERLASM_SCHEME) +INCLUDE[ghashv8-armx.o]=.. +GENERATE[ghash-s390x.S]=asm/ghash-s390x.pl $(PERLASM_SCHEME) +INCLUDE[ghash-s390x.o]=.. + +BEGINRAW[Makefile] +# GNU make "catch all" +{- $builddir -}/ghash-%.S: {- $sourcedir -}/asm/ghash-%.pl + CC="$(CC)" $(PERL) $< $(PERLASM_SCHEME) $@ +ENDRAW[Makefile] diff --git a/crypto/modes/cbc128.c b/crypto/modes/cbc128.c index c13caea5355b..4ce5eb2ae341 100644 --- a/crypto/modes/cbc128.c +++ b/crypto/modes/cbc128.c @@ -1,64 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - #if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC) # define STRICT_ALIGNMENT 0 #endif @@ -70,7 +22,8 @@ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, size_t n; const unsigned char *iv = ivec; - assert(in && out && key && ivec); + if (len == 0) + return; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (STRICT_ALIGNMENT && @@ -123,7 +76,8 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, unsigned char c[16]; } tmp; - assert(in && out && key && ivec); + if (len == 0) + return; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (in != out) { diff --git a/crypto/modes/ccm128.c b/crypto/modes/ccm128.c index c1ded0f9148a..85ce84f10d80 100644 --- a/crypto/modes/ccm128.c +++ b/crypto/modes/ccm128.c @@ -1,63 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" +/* + * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - /* * First you setup M and L parameters and pass the key schedule. This is * called once per session setup... diff --git a/crypto/modes/cfb128.c b/crypto/modes/cfb128.c index d4ecbd08eed1..e439567fe59f 100644 --- a/crypto/modes/cfb128.c +++ b/crypto/modes/cfb128.c @@ -1,64 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - /* * The input and output encrypted as though 128bit cfb mode is being used. * The extra state information to record how much of the 128bit block we have @@ -72,8 +24,6 @@ void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, unsigned int n; size_t l = 0; - assert(in && out && key && ivec && num); - n = *num; if (enc) { @@ -190,7 +140,7 @@ static void cfbr_encrypt_block(const unsigned char *in, unsigned char *out, block128_f block) { int n, rem, num; - unsigned char ovec[16 * 2 + 1]; /* +1 because we dererefence (but don't + unsigned char ovec[16 * 2 + 1]; /* +1 because we dereference (but don't * use) one byte off the end */ if (nbits <= 0 || nbits > 128) @@ -228,9 +178,6 @@ void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out, size_t n; unsigned char c[1], d[1]; - assert(in && out && key && ivec && num); - assert(*num == 0); - for (n = 0; n < bits; ++n) { c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0; cfbr_encrypt_block(c, d, 1, key, ivec, enc, block); @@ -246,9 +193,6 @@ void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out, { size_t n; - assert(in && out && key && ivec && num); - assert(*num == 0); - for (n = 0; n < length; ++n) cfbr_encrypt_block(&in[n], &out[n], 8, key, ivec, enc, block); } diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c index d4b22728e623..03920b447333 100644 --- a/crypto/modes/ctr128.c +++ b/crypto/modes/ctr128.c @@ -1,64 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - /* * NOTE: the IV/counter CTR mode is big-endian. The code itself is * endian-neutral. @@ -113,7 +65,7 @@ static void ctr128_inc_aligned(unsigned char *counter) * before the first call to CRYPTO_ctr128_encrypt(). This algorithm assumes * that the counter is in the x lower bits of the IV (ivec), and that the * application has full control over overflow and the rest of the IV. This - * implementation takes NO responsability for checking that the counter + * implementation takes NO responsibility for checking that the counter * doesn't overflow into the rest of the IV when incremented. */ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, @@ -125,9 +77,6 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, unsigned int n; size_t l = 0; - assert(in && out && key && ecount_buf && num); - assert(*num < 16); - n = *num; #if !defined(OPENSSL_SMALL_FOOTPRINT) @@ -203,9 +152,6 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, { unsigned int n, ctr32; - assert(in && out && key && ecount_buf && num); - assert(*num < 16); - n = *num; while (n && len) { @@ -238,7 +184,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, (*func) (in, out, blocks, key, ivec); /* (*ctr) does not update ivec, caller does: */ PUTU32(ivec + 12, ctr32); - /* ... overflow was detected, propogate carry. */ + /* ... overflow was detected, propagate carry. */ if (ctr32 == 0) ctr96_inc(ivec); blocks *= 16; diff --git a/crypto/modes/cts128.c b/crypto/modes/cts128.c index 137be595a105..93826a1e2f06 100644 --- a/crypto/modes/cts128.c +++ b/crypto/modes/cts128.c @@ -1,21 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. +/* + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * - * Rights for redistribution and usage in source and binary - * forms are granted according to the OpenSSL license. + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - /* * Trouble with Ciphertext Stealing, CTS, mode is that there is no * common official specification, but couple of cipher/application @@ -36,8 +31,6 @@ size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, { size_t residue, n; - assert(in && out && key && ivec); - if (len <= 16) return 0; @@ -68,8 +61,6 @@ size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, { size_t residue, n; - assert(in && out && key && ivec); - if (len < 16) return 0; @@ -103,8 +94,6 @@ size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out, unsigned char c[16]; } tmp; - assert(in && out && key && ivec); - if (len <= 16) return 0; @@ -141,8 +130,6 @@ size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out, unsigned char c[16]; } tmp; - assert(in && out && key && ivec); - if (len < 16) return 0; @@ -179,8 +166,6 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char c[32]; } tmp; - assert(in && out && key && ivec); - if (len <= 16) return 0; @@ -224,8 +209,6 @@ size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char c[32]; } tmp; - assert(in && out && key && ivec); - if (len < 16) return 0; @@ -272,8 +255,6 @@ size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, unsigned char c[32]; } tmp; - assert(in && out && key && ivec); - if (len <= 16) return 0; @@ -314,8 +295,6 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, unsigned char c[32]; } tmp; - assert(in && out && key && ivec); - if (len < 16) return 0; @@ -349,196 +328,3 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, #endif return 16 + len + residue; } - -#if defined(SELFTEST) -# include <stdio.h> -# include <openssl/aes.h> - -/* test vectors from RFC 3962 */ -static const unsigned char test_key[16] = "chicken teriyaki"; -static const unsigned char test_input[64] = - "I would like the" " General Gau's C" - "hicken, please, " "and wonton soup."; -static const unsigned char test_iv[16] = - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -static const unsigned char vector_17[17] = { - 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4, - 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f, - 0x97 -}; - -static const unsigned char vector_31[31] = { - 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1, - 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22, - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5 -}; - -static const unsigned char vector_32[32] = { - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8, - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84 -}; - -static const unsigned char vector_47[47] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c, - 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5 -}; - -static const unsigned char vector_48[48] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0, - 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8 -}; - -static const unsigned char vector_64[64] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8, - 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5, - 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40, - 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0, - 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8 -}; - -static AES_KEY encks, decks; - -void test_vector(const unsigned char *vector, size_t len) -{ - unsigned char iv[sizeof(test_iv)]; - unsigned char cleartext[64], ciphertext[64]; - size_t tail; - - printf("vector_%d\n", len); - fflush(stdout); - - if ((tail = len % 16) == 0) - tail = 16; - tail += 16; - - /* test block-based encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_encrypt_block(test_input, ciphertext, len, &encks, iv, - (block128_f) AES_encrypt); - if (memcmp(ciphertext, vector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(1); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(1); - - /* test block-based decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_decrypt_block(ciphertext, cleartext, len, &decks, iv, - (block128_f) AES_decrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(2); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(2); - - /* test streamed encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_encrypt(test_input, ciphertext, len, &encks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(ciphertext, vector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(3); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(3); - - /* test streamed decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_decrypt(ciphertext, cleartext, len, &decks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(4); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(4); -} - -void test_nistvector(const unsigned char *vector, size_t len) -{ - unsigned char iv[sizeof(test_iv)]; - unsigned char cleartext[64], ciphertext[64], nistvector[64]; - size_t tail; - - printf("nistvector_%d\n", len); - fflush(stdout); - - if ((tail = len % 16) == 0) - tail = 16; - - len -= 16 + tail; - memcpy(nistvector, vector, len); - /* flip two last blocks */ - memcpy(nistvector + len, vector + len + 16, tail); - memcpy(nistvector + len + tail, vector + len, 16); - len += 16 + tail; - tail = 16; - - /* test block-based encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_encrypt_block(test_input, ciphertext, len, &encks, iv, - (block128_f) AES_encrypt); - if (memcmp(ciphertext, nistvector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(1); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(1); - - /* test block-based decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_decrypt_block(ciphertext, cleartext, len, &decks, iv, - (block128_f) AES_decrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(2); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(2); - - /* test streamed encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_encrypt(test_input, ciphertext, len, &encks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(ciphertext, nistvector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(3); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(3); - - /* test streamed decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_decrypt(ciphertext, cleartext, len, &decks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(4); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(4); -} - -int main() -{ - AES_set_encrypt_key(test_key, 128, &encks); - AES_set_decrypt_key(test_key, 128, &decks); - - test_vector(vector_17, sizeof(vector_17)); - test_vector(vector_31, sizeof(vector_31)); - test_vector(vector_32, sizeof(vector_32)); - test_vector(vector_47, sizeof(vector_47)); - test_vector(vector_48, sizeof(vector_48)); - test_vector(vector_64, sizeof(vector_64)); - - test_nistvector(vector_17, sizeof(vector_17)); - test_nistvector(vector_31, sizeof(vector_31)); - test_nistvector(vector_32, sizeof(vector_32)); - test_nistvector(vector_47, sizeof(vector_47)); - test_nistvector(vector_48, sizeof(vector_48)); - test_nistvector(vector_64, sizeof(vector_64)); - - return 0; -} -#endif diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index e299131c1382..15f76e3e86bc 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -1,65 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2010 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" +/* + * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#define OPENSSL_FIPSAPI - #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) /* redefine, because alignment is ensured */ # undef GETU32 @@ -150,9 +101,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; static const size_t rem_8bit[256] = { PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), @@ -260,7 +209,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) } } -# define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) #elif TABLE_BITS==4 @@ -321,9 +270,7 @@ static void gcm_init_4bit(u128 Htable[16], u64 H[2]) const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; if (is_endian.little) for (j = 0; j < 16; ++j) { @@ -356,9 +303,7 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; nlo = ((const u8 *)Xi)[15]; nhi = nlo >> 4; @@ -437,9 +382,7 @@ static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; # if 1 do { @@ -607,7 +550,7 @@ void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); # endif -# define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) /* @@ -629,9 +572,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; V.hi = H[0]; /* H is in host byte order, no byte swapping */ V.lo = H[1]; @@ -683,7 +624,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) } } -# define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) +# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) #endif @@ -762,7 +703,7 @@ void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, #ifdef GCM_FUNCREF_4BIT # undef GCM_MUL -# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) # ifdef GHASH # undef GHASH # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) @@ -774,9 +715,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; memset(ctx, 0, sizeof(*ctx)); ctx->block = block; @@ -801,18 +740,22 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) #if TABLE_BITS==8 gcm_init_8bit(ctx->Htable, ctx->H.u); #elif TABLE_BITS==4 +# if defined(GHASH) +# define CTX__GHASH(f) (ctx->ghash = (f)) +# else +# define CTX__GHASH(f) (ctx->ghash = NULL) +# endif # if defined(GHASH_ASM_X86_OR_64) # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) - if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */ - OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ + if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ gcm_init_avx(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_avx; - ctx->ghash = gcm_ghash_avx; + CTX__GHASH(gcm_ghash_avx); } else { gcm_init_clmul(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_clmul; - ctx->ghash = gcm_ghash_clmul; + CTX__GHASH(gcm_ghash_clmul); } return; } @@ -825,66 +768,59 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ # endif ctx->gmult = gcm_gmult_4bit_mmx; - ctx->ghash = gcm_ghash_4bit_mmx; + CTX__GHASH(gcm_ghash_4bit_mmx); } else { ctx->gmult = gcm_gmult_4bit_x86; - ctx->ghash = gcm_ghash_4bit_x86; + CTX__GHASH(gcm_ghash_4bit_x86); } # else ctx->gmult = gcm_gmult_4bit; - ctx->ghash = gcm_ghash_4bit; + CTX__GHASH(gcm_ghash_4bit); # endif # elif defined(GHASH_ASM_ARM) # ifdef PMULL_CAPABLE if (PMULL_CAPABLE) { gcm_init_v8(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_v8; - ctx->ghash = gcm_ghash_v8; + CTX__GHASH(gcm_ghash_v8); } else # endif # ifdef NEON_CAPABLE if (NEON_CAPABLE) { gcm_init_neon(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_neon; - ctx->ghash = gcm_ghash_neon; + CTX__GHASH(gcm_ghash_neon); } else # endif { gcm_init_4bit(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_4bit; -# if defined(GHASH) - ctx->ghash = gcm_ghash_4bit; -# else - ctx->ghash = NULL; -# endif + CTX__GHASH(gcm_ghash_4bit); } # elif defined(GHASH_ASM_SPARC) if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { gcm_init_vis3(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_vis3; - ctx->ghash = gcm_ghash_vis3; + CTX__GHASH(gcm_ghash_vis3); } else { gcm_init_4bit(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_4bit; - ctx->ghash = gcm_ghash_4bit; + CTX__GHASH(gcm_ghash_4bit); } # elif defined(GHASH_ASM_PPC) if (OPENSSL_ppccap_P & PPC_CRYPTO207) { gcm_init_p8(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_p8; - ctx->ghash = gcm_ghash_p8; + CTX__GHASH(gcm_ghash_p8); } else { gcm_init_4bit(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_4bit; -# if defined(GHASH) - ctx->ghash = gcm_ghash_4bit; -# else - ctx->ghash = NULL; -# endif + CTX__GHASH(gcm_ghash_4bit); } # else gcm_init_4bit(ctx->Htable, ctx->H.u); # endif +# undef CTX__GHASH #endif } @@ -894,18 +830,12 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; unsigned int ctr; #ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; #endif - ctx->Yi.u[0] = 0; - ctx->Yi.u[1] = 0; - ctx->Xi.u[0] = 0; - ctx->Xi.u[1] = 0; ctx->len.u[0] = 0; /* AAD length */ ctx->len.u[1] = 0; /* message length */ ctx->ares = 0; @@ -913,53 +843,68 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, if (len == 12) { memcpy(ctx->Yi.c, iv, 12); + ctx->Yi.c[12] = 0; + ctx->Yi.c[13] = 0; + ctx->Yi.c[14] = 0; ctx->Yi.c[15] = 1; ctr = 1; } else { size_t i; u64 len0 = len; + /* Borrow ctx->Xi to calculate initial Yi */ + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + while (len >= 16) { for (i = 0; i < 16; ++i) - ctx->Yi.c[i] ^= iv[i]; - GCM_MUL(ctx, Yi); + ctx->Xi.c[i] ^= iv[i]; + GCM_MUL(ctx); iv += 16; len -= 16; } if (len) { for (i = 0; i < len; ++i) - ctx->Yi.c[i] ^= iv[i]; - GCM_MUL(ctx, Yi); + ctx->Xi.c[i] ^= iv[i]; + GCM_MUL(ctx); } len0 <<= 3; if (is_endian.little) { #ifdef BSWAP8 - ctx->Yi.u[1] ^= BSWAP8(len0); + ctx->Xi.u[1] ^= BSWAP8(len0); #else - ctx->Yi.c[8] ^= (u8)(len0 >> 56); - ctx->Yi.c[9] ^= (u8)(len0 >> 48); - ctx->Yi.c[10] ^= (u8)(len0 >> 40); - ctx->Yi.c[11] ^= (u8)(len0 >> 32); - ctx->Yi.c[12] ^= (u8)(len0 >> 24); - ctx->Yi.c[13] ^= (u8)(len0 >> 16); - ctx->Yi.c[14] ^= (u8)(len0 >> 8); - ctx->Yi.c[15] ^= (u8)(len0); + ctx->Xi.c[8] ^= (u8)(len0 >> 56); + ctx->Xi.c[9] ^= (u8)(len0 >> 48); + ctx->Xi.c[10] ^= (u8)(len0 >> 40); + ctx->Xi.c[11] ^= (u8)(len0 >> 32); + ctx->Xi.c[12] ^= (u8)(len0 >> 24); + ctx->Xi.c[13] ^= (u8)(len0 >> 16); + ctx->Xi.c[14] ^= (u8)(len0 >> 8); + ctx->Xi.c[15] ^= (u8)(len0); #endif - } else - ctx->Yi.u[1] ^= len0; + } else { + ctx->Xi.u[1] ^= len0; + } - GCM_MUL(ctx, Yi); + GCM_MUL(ctx); if (is_endian.little) #ifdef BSWAP4 - ctr = BSWAP4(ctx->Yi.d[3]); + ctr = BSWAP4(ctx->Xi.d[3]); #else - ctr = GETU32(ctx->Yi.c + 12); + ctr = GETU32(ctx->Xi.c + 12); #endif else - ctr = ctx->Yi.d[3]; + ctr = ctx->Xi.d[3]; + + /* Copy borrowed Xi to Yi */ + ctx->Yi.u[0] = ctx->Xi.u[0]; + ctx->Yi.u[1] = ctx->Xi.u[1]; } + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); ++ctr; if (is_endian.little) @@ -1002,7 +947,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, n = (n + 1) % 16; } if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); else { ctx->ares = n; return 0; @@ -1018,7 +963,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, while (len >= 16) { for (i = 0; i < 16; ++i) ctx->Xi.c[i] ^= aad[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); aad += 16; len -= 16; } @@ -1040,33 +985,42 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const union { long one; char little; - } is_endian = { - 1 - }; - unsigned int n, ctr; + } is_endian = { 1 }; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; block128_f block = ctx->block; void *key = ctx->key; #ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; -# ifdef GHASH +# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len) = ctx->ghash; # endif #endif -#if 0 - n = (unsigned int)mlen % 16; /* alternative to ctx->mres */ -#endif mlen += len; if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to encrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } @@ -1079,28 +1033,49 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16 % sizeof(size_t) == 0) { /* always true actually */ do { if (n) { +# if defined(GHASH) + while (n && len) { + ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } # if defined(STRICT_ALIGNMENT) if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) break; # endif -# if defined(GHASH) && defined(GHASH_CHUNK) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; @@ -1111,11 +1086,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; for (i = 0; i < 16 / sizeof(size_t); ++i) @@ -1127,6 +1102,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); len -= GHASH_CHUNK; } +# endif if ((i = (len & (size_t)-16))) { size_t j = i; @@ -1169,7 +1145,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, ctx->Yi.d[3] = ctr; for (i = 0; i < 16 / sizeof(size_t); ++i) ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; in += 16; len -= 16; @@ -1186,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, # endif else ctx->Yi.d[3] = ctr; +# if defined(GHASH) + while (len--) { + ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; + ++n; + } +# else while (len--) { ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; ++n; } + mres = n; +# endif } - ctx->mres = n; + ctx->mres = mres; return 0; } while (0); } @@ -1210,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; } - ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; n = (n + 1) % 16; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); + mres = 0; + } +#else + ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; + mres = n = (n + 1) % 16; if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif } - ctx->mres = n; + ctx->mres = mres; return 0; } @@ -1227,17 +1220,15 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const union { long one; char little; - } is_endian = { - 1 - }; - unsigned int n, ctr; + } is_endian = { 1 }; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; block128_f block = ctx->block; void *key = ctx->key; #ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; -# ifdef GHASH +# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len) = ctx->ghash; # endif @@ -1248,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to decrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } @@ -1263,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16 % sizeof(size_t) == 0) { /* always true actually */ do { if (n) { +# if defined(GHASH) + while (n && len) { + *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { u8 c = *(in++); *(out++) = c ^ ctx->EKi.c[n]; @@ -1275,18 +1294,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } # if defined(STRICT_ALIGNMENT) if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) break; # endif -# if defined(GHASH) && defined(GHASH_CHUNK) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; @@ -1298,11 +1324,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; for (i = 0; i < 16 / sizeof(size_t); ++i) @@ -1313,6 +1339,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, } len -= GHASH_CHUNK; } +# endif if ((i = (len & (size_t)-16))) { GHASH(ctx, in, i); while (len >= 16) { @@ -1356,7 +1383,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, out[i] = c ^ ctx->EKi.t[i]; ctx->Xi.t[i] ^= c; } - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; in += 16; len -= 16; @@ -1373,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, # endif else ctx->Yi.d[3] = ctr; +# if defined(GHASH) + while (len--) { + out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; + ++n; + } +# else while (len--) { u8 c = in[n]; ctx->Xi.c[n] ^= c; out[n] = c ^ ctx->EKi.c[n]; ++n; } + mres = n; +# endif } - ctx->mres = n; + ctx->mres = mres; return 0; } while (0); } @@ -1400,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; + n = (n + 1) % 16; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); + mres = 0; + } +#else c = in[i]; out[i] = c ^ ctx->EKi.c[n]; ctx->Xi.c[n] ^= c; - n = (n + 1) % 16; + mres = n = (n + 1) % 16; if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif } - ctx->mres = n; + ctx->mres = mres; return 0; } @@ -1416,68 +1460,104 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in, unsigned char *out, size_t len, ctr128_f stream) { +#if defined(OPENSSL_SMALL_FOOTPRINT) + return CRYPTO_gcm128_encrypt(ctx, in, out, len); +#else const union { long one; char little; - } is_endian = { - 1 - }; - unsigned int n, ctr; + } is_endian = { 1 }; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; void *key = ctx->key; -#ifdef GCM_FUNCREF_4BIT +# ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; -# ifdef GHASH +# ifdef GHASH void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len) = ctx->ghash; +# endif # endif -#endif mlen += len; if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to encrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctr = BSWAP4(ctx->Yi.d[3]); -#else +# else ctr = GETU32(ctx->Yi.c + 12); -#endif +# endif else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; if (n) { +# if defined(GHASH) + while (n && len) { + ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } -#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); ctr += GHASH_CHUNK / 16; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; GHASH(ctx, out, GHASH_CHUNK); @@ -1485,99 +1565,134 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, in += GHASH_CHUNK; len -= GHASH_CHUNK; } -#endif +# endif +# endif if ((i = (len & (size_t)-16))) { size_t j = i / 16; (*stream) (in, out, j, key, ctx->Yi.c); ctr += (unsigned int)j; if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -#else +# else PUTU32(ctx->Yi.c + 12, ctr); -#endif +# endif else ctx->Yi.d[3] = ctr; in += i; len -= i; -#if defined(GHASH) +# if defined(GHASH) GHASH(ctx, out, i); out += i; -#else +# else while (j--) { for (i = 0; i < 16; ++i) ctx->Xi.c[i] ^= out[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; } -#endif +# endif } if (len) { (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -#else +# else PUTU32(ctx->Yi.c + 12, ctr); -#endif +# endif else ctx->Yi.d[3] = ctr; while (len--) { - ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; +# if defined(GHASH) + ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; +# else + ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; +# endif ++n; } } - ctx->mres = n; + ctx->mres = mres; return 0; +#endif } int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const unsigned char *in, unsigned char *out, size_t len, ctr128_f stream) { +#if defined(OPENSSL_SMALL_FOOTPRINT) + return CRYPTO_gcm128_decrypt(ctx, in, out, len); +#else const union { long one; char little; - } is_endian = { - 1 - }; - unsigned int n, ctr; + } is_endian = { 1 }; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; void *key = ctx->key; -#ifdef GCM_FUNCREF_4BIT +# ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; -# ifdef GHASH +# ifdef GHASH void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len) = ctx->ghash; +# endif # endif -#endif mlen += len; if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to decrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +# if defined(GHASH) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +# else + GCM_MUL(ctx); +# endif ctx->ares = 0; } if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctr = BSWAP4(ctx->Yi.d[3]); -#else +# else ctr = GETU32(ctx->Yi.c + 12); -#endif +# endif else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; if (n) { +# if defined(GHASH) + while (n && len) { + *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { u8 c = *(in++); *(out++) = c ^ ctx->EKi.c[n]; @@ -1585,55 +1700,63 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } -#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { GHASH(ctx, in, GHASH_CHUNK); (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); ctr += GHASH_CHUNK / 16; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; out += GHASH_CHUNK; in += GHASH_CHUNK; len -= GHASH_CHUNK; } -#endif +# endif +# endif if ((i = (len & (size_t)-16))) { size_t j = i / 16; -#if defined(GHASH) +# if defined(GHASH) GHASH(ctx, in, i); -#else +# else while (j--) { size_t k; for (k = 0; k < 16; ++k) ctx->Xi.c[k] ^= in[k]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); in += 16; } j = i / 16; in -= i; -#endif +# endif (*stream) (in, out, j, key, ctx->Yi.c); ctr += (unsigned int)j; if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -#else +# else PUTU32(ctx->Yi.c + 12, ctr); -#endif +# endif else ctx->Yi.d[3] = ctr; out += i; @@ -1644,23 +1767,28 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; if (is_endian.little) -#ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -#else +# else PUTU32(ctx->Yi.c + 12, ctr); -#endif +# endif else ctx->Yi.d[3] = ctr; while (len--) { +# if defined(GHASH) + out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; +# else u8 c = in[n]; - ctx->Xi.c[n] ^= c; + ctx->Xi.c[mres++] ^= c; out[n] = c ^ ctx->EKi.c[n]; +# endif ++n; } } - ctx->mres = n; + ctx->mres = mres; return 0; +#endif } int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, @@ -1669,17 +1797,37 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, const union { long one; char little; - } is_endian = { - 1 - }; + } is_endian = { 1 }; u64 alen = ctx->len.u[0] << 3; u64 clen = ctx->len.u[1] << 3; #ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; +# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], + const u8 *inp, size_t len) = ctx->ghash; +# endif #endif +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + u128 bitlen; + unsigned int mres = ctx->mres; + + if (mres) { + unsigned blocks = (mres + 15) & -16; + + memset(ctx->Xn + mres, 0, blocks - mres); + mres = blocks; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } + } else if (ctx->ares) { + GCM_MUL(ctx); + } +#else if (ctx->mres || ctx->ares) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif if (is_endian.little) { #ifdef BSWAP8 @@ -1696,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, #endif } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + bitlen.hi = alen; + bitlen.lo = clen; + memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); + mres += sizeof(bitlen); + GHASH(ctx, ctx->Xn, mres); +#else ctx->Xi.u[0] ^= alen; ctx->Xi.u[1] ^= clen; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif ctx->Xi.u[0] ^= ctx->EK0.u[0]; ctx->Xi.u[1] ^= ctx->EK0.u[1]; @@ -1720,7 +1876,7 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) { GCM128_CONTEXT *ret; - if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT)))) + if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) CRYPTO_gcm128_init(ret, key, block); return ret; @@ -1728,644 +1884,5 @@ GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) { - if (ctx) { - OPENSSL_cleanse(ctx, sizeof(*ctx)); - OPENSSL_free(ctx); - } + OPENSSL_clear_free(ctx, sizeof(*ctx)); } - -#if defined(SELFTEST) -# include <stdio.h> -# include <openssl/aes.h> - -/* Test Case 1 */ -static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL; -static const u8 T1[] = { - 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61, - 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a -}; - -/* Test Case 2 */ -# define K2 K1 -# define A2 A1 -# define IV2 IV1 -static const u8 P2[16]; -static const u8 C2[] = { - 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92, - 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78 -}; - -static const u8 T2[] = { - 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd, - 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf -}; - -/* Test Case 3 */ -# define A3 A2 -static const u8 K3[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08 -}; - -static const u8 P3[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV3[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C3[] = { - 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, - 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c, - 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, - 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, - 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c, - 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, - 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, - 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85 -}; - -static const u8 T3[] = { - 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6, - 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4 -}; - -/* Test Case 4 */ -# define K4 K3 -# define IV4 IV3 -static const u8 P4[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A4[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C4[] = { - 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, - 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c, - 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, - 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, - 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c, - 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, - 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, - 0x3d, 0x58, 0xe0, 0x91 -}; - -static const u8 T4[] = { - 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb, - 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47 -}; - -/* Test Case 5 */ -# define K5 K4 -# define P5 P4 -# define A5 A4 -static const u8 IV5[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad -}; - -static const u8 C5[] = { - 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a, - 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55, - 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8, - 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23, - 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2, - 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42, - 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07, - 0xc2, 0x3f, 0x45, 0x98 -}; - -static const u8 T5[] = { - 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85, - 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb -}; - -/* Test Case 6 */ -# define K6 K5 -# define P6 P5 -# define A6 A5 -static const u8 IV6[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C6[] = { - 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6, - 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94, - 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8, - 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7, - 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90, - 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f, - 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03, - 0x4c, 0x34, 0xae, 0xe5 -}; - -static const u8 T6[] = { - 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa, - 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50 -}; - -/* Test Case 7 */ -static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL; -static const u8 T7[] = { - 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b, - 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35 -}; - -/* Test Case 8 */ -# define K8 K7 -# define IV8 IV7 -# define A8 A7 -static const u8 P8[16]; -static const u8 C8[] = { - 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41, - 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00 -}; - -static const u8 T8[] = { - 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab, - 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb -}; - -/* Test Case 9 */ -# define A9 A8 -static const u8 K9[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08, - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c -}; - -static const u8 P9[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV9[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C9[] = { - 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41, - 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57, - 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84, - 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c, - 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25, - 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47, - 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9, - 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56 -}; - -static const u8 T9[] = { - 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf, - 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14 -}; - -/* Test Case 10 */ -# define K10 K9 -# define IV10 IV9 -static const u8 P10[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A10[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C10[] = { - 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41, - 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57, - 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84, - 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c, - 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25, - 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47, - 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9, - 0xcc, 0xda, 0x27, 0x10 -}; - -static const u8 T10[] = { - 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f, - 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c -}; - -/* Test Case 11 */ -# define K11 K10 -# define P11 P10 -# define A11 A10 -static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad }; - -static const u8 C11[] = { - 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54, - 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8, - 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f, - 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57, - 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75, - 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9, - 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f, - 0xa0, 0xf0, 0x62, 0xf7 -}; - -static const u8 T11[] = { - 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24, - 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8 -}; - -/* Test Case 12 */ -# define K12 K11 -# define P12 P11 -# define A12 A11 -static const u8 IV12[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C12[] = { - 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c, - 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff, - 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef, - 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45, - 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9, - 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3, - 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7, - 0xe9, 0xb7, 0x37, 0x3b -}; - -static const u8 T12[] = { - 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb, - 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9 -}; - -/* Test Case 13 */ -static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL; -static const u8 T13[] = { - 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9, - 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b -}; - -/* Test Case 14 */ -# define K14 K13 -# define A14 A13 -static const u8 P14[16], IV14[12]; -static const u8 C14[] = { - 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e, - 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18 -}; - -static const u8 T14[] = { - 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0, - 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19 -}; - -/* Test Case 15 */ -# define A15 A14 -static const u8 K15[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08, - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08 -}; - -static const u8 P15[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV15[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C15[] = { - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad -}; - -static const u8 T15[] = { - 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd, - 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c -}; - -/* Test Case 16 */ -# define K16 K15 -# define IV16 IV15 -static const u8 P16[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A16[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C16[] = { - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62 -}; - -static const u8 T16[] = { - 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68, - 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b -}; - -/* Test Case 17 */ -# define K17 K16 -# define P17 P16 -# define A17 A16 -static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad }; - -static const u8 C17[] = { - 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32, - 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb, - 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa, - 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0, - 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0, - 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78, - 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99, - 0xf4, 0x7c, 0x9b, 0x1f -}; - -static const u8 T17[] = { - 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4, - 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2 -}; - -/* Test Case 18 */ -# define K18 K17 -# define P18 P17 -# define A18 A17 -static const u8 IV18[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C18[] = { - 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1, - 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20, - 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19, - 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4, - 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45, - 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde, - 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e, - 0x44, 0xae, 0x7e, 0x3f -}; - -static const u8 T18[] = { - 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0, - 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a -}; - -/* Test Case 19 */ -# define K19 K1 -# define P19 P1 -# define IV19 IV1 -# define C19 C1 -static const u8 A19[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55, - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad -}; - -static const u8 T19[] = { - 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d, - 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92 -}; - -/* Test Case 20 */ -# define K20 K1 -# define A20 A1 -/* this results in 0xff in counter LSB */ -static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff }; - -static const u8 P20[288]; -static const u8 C20[] = { - 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a, - 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14, - 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce, - 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f, - 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70, - 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18, - 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf, - 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49, - 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab, - 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c, - 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c, - 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29, - 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1, - 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76, - 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2, - 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce, - 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f, - 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86, - 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb, - 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18, - 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65, - 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42, - 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b, - 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06, - 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24, - 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c, - 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4, - 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64, - 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03, - 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6, - 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90, - 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74, - 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67, - 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46, - 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78, - 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c -}; - -static const u8 T20[] = { - 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a, - 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f -}; - -# define TEST_CASE(n) do { \ - u8 out[sizeof(P##n)]; \ - AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \ - CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \ - CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ - memset(out,0,sizeof(out)); \ - if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ - if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \ - if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ - (C##n && memcmp(out,C##n,sizeof(out)))) \ - ret++, printf ("encrypt test#%d failed.\n",n); \ - CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ - memset(out,0,sizeof(out)); \ - if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ - if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \ - if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ - (P##n && memcmp(out,P##n,sizeof(out)))) \ - ret++, printf ("decrypt test#%d failed.\n",n); \ - } while(0) - -int main() -{ - GCM128_CONTEXT ctx; - AES_KEY key; - int ret = 0; - - TEST_CASE(1); - TEST_CASE(2); - TEST_CASE(3); - TEST_CASE(4); - TEST_CASE(5); - TEST_CASE(6); - TEST_CASE(7); - TEST_CASE(8); - TEST_CASE(9); - TEST_CASE(10); - TEST_CASE(11); - TEST_CASE(12); - TEST_CASE(13); - TEST_CASE(14); - TEST_CASE(15); - TEST_CASE(16); - TEST_CASE(17); - TEST_CASE(18); - TEST_CASE(19); - TEST_CASE(20); - -# ifdef OPENSSL_CPUID_OBJ - { - size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc(); - union { - u64 u; - u8 c[1024]; - } buf; - int i; - - AES_set_encrypt_key(K1, sizeof(K1) * 8, &key); - CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt); - CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1)); - - CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf)); - start = OPENSSL_rdtsc(); - CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf)); - gcm_t = OPENSSL_rdtsc() - start; - - CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf), - &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres, - (block128_f) AES_encrypt); - start = OPENSSL_rdtsc(); - CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf), - &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres, - (block128_f) AES_encrypt); - ctr_t = OPENSSL_rdtsc() - start; - - printf("%.2f-%.2f=%.2f\n", - gcm_t / (double)sizeof(buf), - ctr_t / (double)sizeof(buf), - (gcm_t - ctr_t) / (double)sizeof(buf)); -# ifdef GHASH - { - void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], - const u8 *inp, size_t len) = ctx.ghash; - - GHASH((&ctx), buf.c, sizeof(buf)); - start = OPENSSL_rdtsc(); - for (i = 0; i < 100; ++i) - GHASH((&ctx), buf.c, sizeof(buf)); - gcm_t = OPENSSL_rdtsc() - start; - printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i); - } -# endif - } -# endif - - return ret; -} -#endif diff --git a/crypto/modes/modes.h b/crypto/modes/modes.h deleted file mode 100644 index fd488499a0b4..000000000000 --- a/crypto/modes/modes.h +++ /dev/null @@ -1,163 +0,0 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Rights for redistribution and usage in source and binary - * forms are granted according to the OpenSSL license. - */ - -#include <stddef.h> - -#ifdef __cplusplus -extern "C" { -#endif -typedef void (*block128_f) (const unsigned char in[16], - unsigned char out[16], const void *key); - -typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], int enc); - -typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out, - size_t blocks, const void *key, - const unsigned char ivec[16]); - -typedef void (*ccm128_f) (const unsigned char *in, unsigned char *out, - size_t blocks, const void *key, - const unsigned char ivec[16], - unsigned char cmac[16]); - -void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], block128_f block); -void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], block128_f block); - -void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], - unsigned char ecount_buf[16], unsigned int *num, - block128_f block); - -void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], - unsigned char ecount_buf[16], - unsigned int *num, ctr128_f ctr); - -void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], int *num, - block128_f block); - -void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], int *num, - int enc, block128_f block); -void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const void *key, - unsigned char ivec[16], int *num, - int enc, block128_f block); -void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out, - size_t bits, const void *key, - unsigned char ivec[16], int *num, - int enc, block128_f block); - -size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, - unsigned char *out, size_t len, - const void *key, unsigned char ivec[16], - block128_f block); -size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], cbc128_f cbc); -size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, - unsigned char *out, size_t len, - const void *key, unsigned char ivec[16], - block128_f block); -size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], cbc128_f cbc); - -size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, - unsigned char *out, size_t len, - const void *key, - unsigned char ivec[16], - block128_f block); -size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], cbc128_f cbc); -size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, - unsigned char *out, size_t len, - const void *key, - unsigned char ivec[16], - block128_f block); -size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, - size_t len, const void *key, - unsigned char ivec[16], cbc128_f cbc); - -typedef struct gcm128_context GCM128_CONTEXT; - -GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block); -void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block); -void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, - size_t len); -int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, - size_t len); -int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, - const unsigned char *in, unsigned char *out, - size_t len); -int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, - const unsigned char *in, unsigned char *out, - size_t len); -int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, - const unsigned char *in, unsigned char *out, - size_t len, ctr128_f stream); -int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, - const unsigned char *in, unsigned char *out, - size_t len, ctr128_f stream); -int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, - size_t len); -void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len); -void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx); - -typedef struct ccm128_context CCM128_CONTEXT; - -void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, - unsigned int M, unsigned int L, void *key, - block128_f block); -int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, const unsigned char *nonce, - size_t nlen, size_t mlen); -void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, const unsigned char *aad, - size_t alen); -int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, const unsigned char *inp, - unsigned char *out, size_t len); -int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, const unsigned char *inp, - unsigned char *out, size_t len); -int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp, - unsigned char *out, size_t len, - ccm128_f stream); -int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, const unsigned char *inp, - unsigned char *out, size_t len, - ccm128_f stream); -size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len); - -typedef struct xts128_context XTS128_CONTEXT; - -int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, - const unsigned char iv[16], - const unsigned char *inp, unsigned char *out, - size_t len, int enc); - -size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, - unsigned char *out, - const unsigned char *in, size_t inlen, - block128_f block); - -size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, - unsigned char *out, - const unsigned char *in, size_t inlen, - block128_f block); - -#ifdef __cplusplus -} -#endif diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h index fe14ec7002f0..f2ae01d11afd 100644 --- a/crypto/modes/modes_lcl.h +++ b/crypto/modes/modes_lcl.h @@ -1,8 +1,10 @@ -/* ==================================================================== - * Copyright (c) 2010 The OpenSSL Project. All rights reserved. +/* + * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. * - * Redistribution and use is governed by OpenSSL license. - * ==================================================================== + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/modes.h> @@ -71,6 +73,7 @@ typedef unsigned char u8; # endif # elif defined(_MSC_VER) # if _MSC_VER>=1300 +# include <stdlib.h> # pragma intrinsic(_byteswap_uint64,_byteswap_ulong) # define BSWAP8(x) _byteswap_uint64((u64)(x)) # define BSWAP4(x) _byteswap_ulong((u32)(x)) @@ -125,6 +128,9 @@ struct gcm128_context { unsigned int mres, ares; block128_f block; void *key; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + unsigned char Xn[48]; +#endif }; struct xts128_context { @@ -141,3 +147,44 @@ struct ccm128_context { block128_f block; void *key; }; + +#ifndef OPENSSL_NO_OCB + +typedef union { + u64 a[2]; + unsigned char c[16]; +} OCB_BLOCK; +# define ocb_block16_xor(in1,in2,out) \ + ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \ + (out)->a[1]=(in1)->a[1]^(in2)->a[1] ) +# if STRICT_ALIGNMENT +# define ocb_block16_xor_misaligned(in1,in2,out) \ + ocb_block_xor((in1)->c,(in2)->c,16,(out)->c) +# else +# define ocb_block16_xor_misaligned ocb_block16_xor +# endif + +struct ocb128_context { + /* Need both encrypt and decrypt key schedules for decryption */ + block128_f encrypt; + block128_f decrypt; + void *keyenc; + void *keydec; + ocb128_f stream; /* direction dependent */ + /* Key dependent variables. Can be reused if key remains the same */ + size_t l_index; + size_t max_l_index; + OCB_BLOCK l_star; + OCB_BLOCK l_dollar; + OCB_BLOCK *l; + /* Must be reset for each session */ + struct { + u64 blocks_hashed; + u64 blocks_processed; + OCB_BLOCK offset_aad; + OCB_BLOCK sum; + OCB_BLOCK offset; + OCB_BLOCK checksum; + } sess; +}; +#endif /* OPENSSL_NO_OCB */ diff --git a/crypto/modes/ocb128.c b/crypto/modes/ocb128.c new file mode 100644 index 000000000000..713b9aaf19d5 --- /dev/null +++ b/crypto/modes/ocb128.c @@ -0,0 +1,562 @@ +/* + * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include <string.h> +#include <openssl/crypto.h> +#include <openssl/err.h> +#include "modes_lcl.h" + +#ifndef OPENSSL_NO_OCB + +/* + * Calculate the number of binary trailing zero's in any given number + */ +static u32 ocb_ntz(u64 n) +{ + u32 cnt = 0; + + /* + * We do a right-to-left simple sequential search. This is surprisingly + * efficient as the distribution of trailing zeros is not uniform, + * e.g. the number of possible inputs with no trailing zeros is equal to + * the number with 1 or more; the number with exactly 1 is equal to the + * number with 2 or more, etc. Checking the last two bits covers 75% of + * all numbers. Checking the last three covers 87.5% + */ + while (!(n & 1)) { + n >>= 1; + cnt++; + } + return cnt; +} + +/* + * Shift a block of 16 bytes left by shift bits + */ +static void ocb_block_lshift(const unsigned char *in, size_t shift, + unsigned char *out) +{ + int i; + unsigned char carry = 0, carry_next; + + for (i = 15; i >= 0; i--) { + carry_next = in[i] >> (8 - shift); + out[i] = (in[i] << shift) | carry; + carry = carry_next; + } +} + +/* + * Perform a "double" operation as per OCB spec + */ +static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out) +{ + unsigned char mask; + + /* + * Calculate the mask based on the most significant bit. There are more + * efficient ways to do this - but this way is constant time + */ + mask = in->c[0] & 0x80; + mask >>= 7; + mask = (0 - mask) & 0x87; + + ocb_block_lshift(in->c, 1, out->c); + + out->c[15] ^= mask; +} + +/* + * Perform an xor on in1 and in2 - each of len bytes. Store result in out + */ +static void ocb_block_xor(const unsigned char *in1, + const unsigned char *in2, size_t len, + unsigned char *out) +{ + size_t i; + for (i = 0; i < len; i++) { + out[i] = in1[i] ^ in2[i]; + } +} + +/* + * Lookup L_index in our lookup table. If we haven't already got it we need to + * calculate it + */ +static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx) +{ + size_t l_index = ctx->l_index; + + if (idx <= l_index) { + return ctx->l + idx; + } + + /* We don't have it - so calculate it */ + if (idx >= ctx->max_l_index) { + void *tmp_ptr; + /* + * Each additional entry allows to process almost double as + * much data, so that in linear world the table will need to + * be expanded with smaller and smaller increments. Originally + * it was doubling in size, which was a waste. Growing it + * linearly is not formally optimal, but is simpler to implement. + * We grow table by minimally required 4*n that would accommodate + * the index. + */ + ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3; + tmp_ptr = OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK)); + if (tmp_ptr == NULL) /* prevent ctx->l from being clobbered */ + return NULL; + ctx->l = tmp_ptr; + } + while (l_index < idx) { + ocb_double(ctx->l + l_index, ctx->l + l_index + 1); + l_index++; + } + ctx->l_index = l_index; + + return ctx->l + idx; +} + +/* + * Create a new OCB128_CONTEXT + */ +OCB128_CONTEXT *CRYPTO_ocb128_new(void *keyenc, void *keydec, + block128_f encrypt, block128_f decrypt, + ocb128_f stream) +{ + OCB128_CONTEXT *octx; + int ret; + + if ((octx = OPENSSL_malloc(sizeof(*octx))) != NULL) { + ret = CRYPTO_ocb128_init(octx, keyenc, keydec, encrypt, decrypt, + stream); + if (ret) + return octx; + OPENSSL_free(octx); + } + + return NULL; +} + +/* + * Initialise an existing OCB128_CONTEXT + */ +int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec, + block128_f encrypt, block128_f decrypt, + ocb128_f stream) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->l_index = 0; + ctx->max_l_index = 5; + if ((ctx->l = OPENSSL_malloc(ctx->max_l_index * 16)) == NULL) { + CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_INIT, ERR_R_MALLOC_FAILURE); + return 0; + } + + /* + * We set both the encryption and decryption key schedules - decryption + * needs both. Don't really need decryption schedule if only doing + * encryption - but it simplifies things to take it anyway + */ + ctx->encrypt = encrypt; + ctx->decrypt = decrypt; + ctx->stream = stream; + ctx->keyenc = keyenc; + ctx->keydec = keydec; + + /* L_* = ENCIPHER(K, zeros(128)) */ + ctx->encrypt(ctx->l_star.c, ctx->l_star.c, ctx->keyenc); + + /* L_$ = double(L_*) */ + ocb_double(&ctx->l_star, &ctx->l_dollar); + + /* L_0 = double(L_$) */ + ocb_double(&ctx->l_dollar, ctx->l); + + /* L_{i} = double(L_{i-1}) */ + ocb_double(ctx->l, ctx->l+1); + ocb_double(ctx->l+1, ctx->l+2); + ocb_double(ctx->l+2, ctx->l+3); + ocb_double(ctx->l+3, ctx->l+4); + ctx->l_index = 4; /* enough to process up to 496 bytes */ + + return 1; +} + +/* + * Copy an OCB128_CONTEXT object + */ +int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src, + void *keyenc, void *keydec) +{ + memcpy(dest, src, sizeof(OCB128_CONTEXT)); + if (keyenc) + dest->keyenc = keyenc; + if (keydec) + dest->keydec = keydec; + if (src->l) { + if ((dest->l = OPENSSL_malloc(src->max_l_index * 16)) == NULL) { + CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_COPY_CTX, ERR_R_MALLOC_FAILURE); + return 0; + } + memcpy(dest->l, src->l, (src->l_index + 1) * 16); + } + return 1; +} + +/* + * Set the IV to be used for this operation. Must be 1 - 15 bytes. + */ +int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv, + size_t len, size_t taglen) +{ + unsigned char ktop[16], tmp[16], mask; + unsigned char stretch[24], nonce[16]; + size_t bottom, shift; + + /* + * Spec says IV is 120 bits or fewer - it allows non byte aligned lengths. + * We don't support this at this stage + */ + if ((len > 15) || (len < 1) || (taglen > 16) || (taglen < 1)) { + return -1; + } + + /* Reset nonce-dependent variables */ + memset(&ctx->sess, 0, sizeof(ctx->sess)); + + /* Nonce = num2str(TAGLEN mod 128,7) || zeros(120-bitlen(N)) || 1 || N */ + nonce[0] = ((taglen * 8) % 128) << 1; + memset(nonce + 1, 0, 15); + memcpy(nonce + 16 - len, iv, len); + nonce[15 - len] |= 1; + + /* Ktop = ENCIPHER(K, Nonce[1..122] || zeros(6)) */ + memcpy(tmp, nonce, 16); + tmp[15] &= 0xc0; + ctx->encrypt(tmp, ktop, ctx->keyenc); + + /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */ + memcpy(stretch, ktop, 16); + ocb_block_xor(ktop, ktop + 1, 8, stretch + 16); + + /* bottom = str2num(Nonce[123..128]) */ + bottom = nonce[15] & 0x3f; + + /* Offset_0 = Stretch[1+bottom..128+bottom] */ + shift = bottom % 8; + ocb_block_lshift(stretch + (bottom / 8), shift, ctx->sess.offset.c); + mask = 0xff; + mask <<= 8 - shift; + ctx->sess.offset.c[15] |= + (*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift); + + return 1; +} + +/* + * Provide any AAD. This can be called multiple times. Only the final time can + * have a partial block + */ +int CRYPTO_ocb128_aad(OCB128_CONTEXT *ctx, const unsigned char *aad, + size_t len) +{ + u64 i, all_num_blocks; + size_t num_blocks, last_len; + OCB_BLOCK tmp; + + /* Calculate the number of blocks of AAD provided now, and so far */ + num_blocks = len / 16; + all_num_blocks = num_blocks + ctx->sess.blocks_hashed; + + /* Loop through all full blocks of AAD */ + for (i = ctx->sess.blocks_hashed + 1; i <= all_num_blocks; i++) { + OCB_BLOCK *lookup; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + lookup = ocb_lookup_l(ctx, ocb_ntz(i)); + if (lookup == NULL) + return 0; + ocb_block16_xor(&ctx->sess.offset_aad, lookup, &ctx->sess.offset_aad); + + memcpy(tmp.c, aad, 16); + aad += 16; + + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + ocb_block16_xor(&ctx->sess.offset_aad, &tmp, &tmp); + ctx->encrypt(tmp.c, tmp.c, ctx->keyenc); + ocb_block16_xor(&tmp, &ctx->sess.sum, &ctx->sess.sum); + } + + /* + * Check if we have any partial blocks left over. This is only valid in the + * last call to this function + */ + last_len = len % 16; + + if (last_len > 0) { + /* Offset_* = Offset_m xor L_* */ + ocb_block16_xor(&ctx->sess.offset_aad, &ctx->l_star, + &ctx->sess.offset_aad); + + /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */ + memset(tmp.c, 0, 16); + memcpy(tmp.c, aad, last_len); + tmp.c[last_len] = 0x80; + ocb_block16_xor(&ctx->sess.offset_aad, &tmp, &tmp); + + /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */ + ctx->encrypt(tmp.c, tmp.c, ctx->keyenc); + ocb_block16_xor(&tmp, &ctx->sess.sum, &ctx->sess.sum); + } + + ctx->sess.blocks_hashed = all_num_blocks; + + return 1; +} + +/* + * Provide any data to be encrypted. This can be called multiple times. Only + * the final time can have a partial block + */ +int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + u64 i, all_num_blocks; + size_t num_blocks, last_len; + + /* + * Calculate the number of blocks of data to be encrypted provided now, and + * so far + */ + num_blocks = len / 16; + all_num_blocks = num_blocks + ctx->sess.blocks_processed; + + if (num_blocks && all_num_blocks == (size_t)all_num_blocks + && ctx->stream != NULL) { + size_t max_idx = 0, top = (size_t)all_num_blocks; + + /* + * See how many L_{i} entries we need to process data at hand + * and pre-compute missing entries in the table [if any]... + */ + while (top >>= 1) + max_idx++; + if (ocb_lookup_l(ctx, max_idx) == NULL) + return 0; + + ctx->stream(in, out, num_blocks, ctx->keyenc, + (size_t)ctx->sess.blocks_processed + 1, ctx->sess.offset.c, + (const unsigned char (*)[16])ctx->l, ctx->sess.checksum.c); + } else { + /* Loop through all full blocks to be encrypted */ + for (i = ctx->sess.blocks_processed + 1; i <= all_num_blocks; i++) { + OCB_BLOCK *lookup; + OCB_BLOCK tmp; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + lookup = ocb_lookup_l(ctx, ocb_ntz(i)); + if (lookup == NULL) + return 0; + ocb_block16_xor(&ctx->sess.offset, lookup, &ctx->sess.offset); + + memcpy(tmp.c, in, 16); + in += 16; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_block16_xor(&tmp, &ctx->sess.checksum, &ctx->sess.checksum); + + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp); + ctx->encrypt(tmp.c, tmp.c, ctx->keyenc); + ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp); + + memcpy(out, tmp.c, 16); + out += 16; + } + } + + /* + * Check if we have any partial blocks left over. This is only valid in the + * last call to this function + */ + last_len = len % 16; + + if (last_len > 0) { + OCB_BLOCK pad; + + /* Offset_* = Offset_m xor L_* */ + ocb_block16_xor(&ctx->sess.offset, &ctx->l_star, &ctx->sess.offset); + + /* Pad = ENCIPHER(K, Offset_*) */ + ctx->encrypt(ctx->sess.offset.c, pad.c, ctx->keyenc); + + /* C_* = P_* xor Pad[1..bitlen(P_*)] */ + ocb_block_xor(in, pad.c, last_len, out); + + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + memset(pad.c, 0, 16); /* borrow pad */ + memcpy(pad.c, in, last_len); + pad.c[last_len] = 0x80; + ocb_block16_xor(&pad, &ctx->sess.checksum, &ctx->sess.checksum); + } + + ctx->sess.blocks_processed = all_num_blocks; + + return 1; +} + +/* + * Provide any data to be decrypted. This can be called multiple times. Only + * the final time can have a partial block + */ +int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + u64 i, all_num_blocks; + size_t num_blocks, last_len; + + /* + * Calculate the number of blocks of data to be decrypted provided now, and + * so far + */ + num_blocks = len / 16; + all_num_blocks = num_blocks + ctx->sess.blocks_processed; + + if (num_blocks && all_num_blocks == (size_t)all_num_blocks + && ctx->stream != NULL) { + size_t max_idx = 0, top = (size_t)all_num_blocks; + + /* + * See how many L_{i} entries we need to process data at hand + * and pre-compute missing entries in the table [if any]... + */ + while (top >>= 1) + max_idx++; + if (ocb_lookup_l(ctx, max_idx) == NULL) + return 0; + + ctx->stream(in, out, num_blocks, ctx->keydec, + (size_t)ctx->sess.blocks_processed + 1, ctx->sess.offset.c, + (const unsigned char (*)[16])ctx->l, ctx->sess.checksum.c); + } else { + OCB_BLOCK tmp; + + /* Loop through all full blocks to be decrypted */ + for (i = ctx->sess.blocks_processed + 1; i <= all_num_blocks; i++) { + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + OCB_BLOCK *lookup = ocb_lookup_l(ctx, ocb_ntz(i)); + if (lookup == NULL) + return 0; + ocb_block16_xor(&ctx->sess.offset, lookup, &ctx->sess.offset); + + memcpy(tmp.c, in, 16); + in += 16; + + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp); + ctx->decrypt(tmp.c, tmp.c, ctx->keydec); + ocb_block16_xor(&ctx->sess.offset, &tmp, &tmp); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_block16_xor(&tmp, &ctx->sess.checksum, &ctx->sess.checksum); + + memcpy(out, tmp.c, 16); + out += 16; + } + } + + /* + * Check if we have any partial blocks left over. This is only valid in the + * last call to this function + */ + last_len = len % 16; + + if (last_len > 0) { + OCB_BLOCK pad; + + /* Offset_* = Offset_m xor L_* */ + ocb_block16_xor(&ctx->sess.offset, &ctx->l_star, &ctx->sess.offset); + + /* Pad = ENCIPHER(K, Offset_*) */ + ctx->encrypt(ctx->sess.offset.c, pad.c, ctx->keyenc); + + /* P_* = C_* xor Pad[1..bitlen(C_*)] */ + ocb_block_xor(in, pad.c, last_len, out); + + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + memset(pad.c, 0, 16); /* borrow pad */ + memcpy(pad.c, out, last_len); + pad.c[last_len] = 0x80; + ocb_block16_xor(&pad, &ctx->sess.checksum, &ctx->sess.checksum); + } + + ctx->sess.blocks_processed = all_num_blocks; + + return 1; +} + +static int ocb_finish(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len, + int write) +{ + OCB_BLOCK tmp; + + if (len > 16 || len < 1) { + return -1; + } + + /* + * Tag = ENCIPHER(K, Checksum_* xor Offset_* xor L_$) xor HASH(K,A) + */ + ocb_block16_xor(&ctx->sess.checksum, &ctx->sess.offset, &tmp); + ocb_block16_xor(&ctx->l_dollar, &tmp, &tmp); + ctx->encrypt(tmp.c, tmp.c, ctx->keyenc); + ocb_block16_xor(&tmp, &ctx->sess.sum, &tmp); + + if (write) { + memcpy(tag, &tmp, len); + return 1; + } else { + return CRYPTO_memcmp(&tmp, tag, len); + } +} + +/* + * Calculate the tag and verify it against the supplied tag + */ +int CRYPTO_ocb128_finish(OCB128_CONTEXT *ctx, const unsigned char *tag, + size_t len) +{ + return ocb_finish(ctx, (unsigned char*)tag, len, 0); +} + +/* + * Retrieve the calculated tag + */ +int CRYPTO_ocb128_tag(OCB128_CONTEXT *ctx, unsigned char *tag, size_t len) +{ + return ocb_finish(ctx, tag, len, 1); +} + +/* + * Release all resources + */ +void CRYPTO_ocb128_cleanup(OCB128_CONTEXT *ctx) +{ + if (ctx) { + OPENSSL_clear_free(ctx->l, ctx->max_l_index * 16); + OPENSSL_cleanse(ctx, sizeof(*ctx)); + } +} + +#endif /* OPENSSL_NO_OCB */ diff --git a/crypto/modes/ofb128.c b/crypto/modes/ofb128.c index 4dbaccd7a6ae..83092564c6b6 100644 --- a/crypto/modes/ofb128.c +++ b/crypto/modes/ofb128.c @@ -1,64 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - /* * The input and output encrypted as though 128bit ofb mode is being used. * The extra state information to record how much of the 128bit block we have @@ -71,8 +23,6 @@ void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, unsigned int n; size_t l = 0; - assert(in && out && key && ivec && num); - n = *num; #if !defined(OPENSSL_SMALL_FOOTPRINT) diff --git a/crypto/modes/wrap128.c b/crypto/modes/wrap128.c index 384978371af2..d7e56cc260ad 100644 --- a/crypto/modes/wrap128.c +++ b/crypto/modes/wrap128.c @@ -1,70 +1,50 @@ -/* crypto/modes/wrap128.c */ /* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2013 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/** Beware! * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== + * Following wrapping modes were designed for AES but this implementation + * allows you to use them for any 128 bit block cipher. */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include <openssl/modes.h> +/** RFC 3394 section 2.2.3.1 Default Initial Value */ static const unsigned char default_iv[] = { 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, }; -/* - * Input size limit: lower than maximum of standards but far larger than - * anything that will be used in practice. +/** RFC 5649 section 3 Alternative Initial Value 32-bit constant */ +static const unsigned char default_aiv[] = { + 0xA6, 0x59, 0x59, 0xA6 +}; + +/** Input size limit: lower than maximum of standards but far larger than + * anything that will be used in practice. */ #define CRYPTO128_WRAP_MAX (1UL << 31) +/** Wrapping according to RFC 3394 section 2.2.1. + * + * @param[in] key Key value. + * @param[in] iv IV value. Length = 8 bytes. NULL = use default_iv. + * @param[in] in Plaintext as n 64-bit blocks, n >= 2. + * @param[in] inlen Length of in. + * @param[out] out Ciphertext. Minimal buffer length = (inlen + 8) bytes. + * Input and output buffers can overlap if block function + * supports that. + * @param[in] block Block processing function. + * @return 0 if inlen does not consist of n 64-bit blocks, n >= 2. + * or if inlen > CRYPTO128_WRAP_MAX. + * Output length if wrapping succeeded. + */ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, unsigned char *out, const unsigned char *in, size_t inlen, @@ -72,7 +52,7 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, { unsigned char *A, B[16], *R; size_t i, j, t; - if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX)) + if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) return 0; A = B; t = 1; @@ -100,10 +80,26 @@ size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, return inlen + 8; } -size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, - unsigned char *out, - const unsigned char *in, size_t inlen, - block128_f block) +/** Unwrapping according to RFC 3394 section 2.2.2 steps 1-2. + * The IV check (step 3) is responsibility of the caller. + * + * @param[in] key Key value. + * @param[out] iv Unchecked IV value. Minimal buffer length = 8 bytes. + * @param[out] out Plaintext without IV. + * Minimal buffer length = (inlen - 8) bytes. + * Input and output buffers can overlap if block function + * supports that. + * @param[in] in Ciphertext as n 64-bit blocks. + * @param[in] inlen Length of in. + * @param[in] block Block processing function. + * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX] + * or if inlen is not a multiple of 8. + * Output length otherwise. + */ +static size_t crypto_128_unwrap_raw(void *key, unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) { unsigned char *A, B[16], *R; size_t i, j, t; @@ -128,11 +124,208 @@ size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, memcpy(R, B + 8, 8); } } + memcpy(iv, A, 8); + return inlen; +} + +/** Unwrapping according to RFC 3394 section 2.2.2, including the IV check. + * The first block of plaintext has to match the supplied IV, otherwise an + * error is returned. + * + * @param[in] key Key value. + * @param[out] iv IV value to match against. Length = 8 bytes. + * NULL = use default_iv. + * @param[out] out Plaintext without IV. + * Minimal buffer length = (inlen - 8) bytes. + * Input and output buffers can overlap if block function + * supports that. + * @param[in] in Ciphertext as n 64-bit blocks. + * @param[in] inlen Length of in. + * @param[in] block Block processing function. + * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX] + * or if inlen is not a multiple of 8 + * or if IV doesn't match expected value. + * Output length otherwise. + */ +size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, + unsigned char *out, const unsigned char *in, + size_t inlen, block128_f block) +{ + size_t ret; + unsigned char got_iv[8]; + + ret = crypto_128_unwrap_raw(key, got_iv, out, in, inlen, block); + if (ret == 0) + return 0; + if (!iv) iv = default_iv; - if (memcmp(A, iv, 8)) { + if (CRYPTO_memcmp(got_iv, iv, 8)) { + OPENSSL_cleanse(out, ret); + return 0; + } + return ret; +} + +/** Wrapping according to RFC 5649 section 4.1. + * + * @param[in] key Key value. + * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv. + * @param[out] out Ciphertext. Minimal buffer length = (inlen + 15) bytes. + * Input and output buffers can overlap if block function + * supports that. + * @param[in] in Plaintext as n 64-bit blocks, n >= 2. + * @param[in] inlen Length of in. + * @param[in] block Block processing function. + * @return 0 if inlen is out of range [1, CRYPTO128_WRAP_MAX]. + * Output length if wrapping succeeded. + */ +size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + /* n: number of 64-bit blocks in the padded key data + * + * If length of plain text is not a multiple of 8, pad the plain text octet + * string on the right with octets of zeros, where final length is the + * smallest multiple of 8 that is greater than length of plain text. + * If length of plain text is a multiple of 8, then there is no padding. */ + const size_t blocks_padded = (inlen + 7) / 8; /* CEILING(m/8) */ + const size_t padded_len = blocks_padded * 8; + const size_t padding_len = padded_len - inlen; + /* RFC 5649 section 3: Alternative Initial Value */ + unsigned char aiv[8]; + int ret; + + /* Section 1: use 32-bit fixed field for plaintext octet length */ + if (inlen == 0 || inlen >= CRYPTO128_WRAP_MAX) + return 0; + + /* Section 3: Alternative Initial Value */ + if (!icv) + memcpy(aiv, default_aiv, 4); + else + memcpy(aiv, icv, 4); /* Standard doesn't mention this. */ + + aiv[4] = (inlen >> 24) & 0xFF; + aiv[5] = (inlen >> 16) & 0xFF; + aiv[6] = (inlen >> 8) & 0xFF; + aiv[7] = inlen & 0xFF; + + if (padded_len == 8) { + /* + * Section 4.1 - special case in step 2: If the padded plaintext + * contains exactly eight octets, then prepend the AIV and encrypt + * the resulting 128-bit block using AES in ECB mode. + */ + memmove(out + 8, in, inlen); + memcpy(out, aiv, 8); + memset(out + 8 + inlen, 0, padding_len); + block(out, out, key); + ret = 16; /* AIV + padded input */ + } else { + memmove(out, in, inlen); + memset(out + inlen, 0, padding_len); /* Section 4.1 step 1 */ + ret = CRYPTO_128_wrap(key, aiv, out, out, padded_len, block); + } + + return ret; +} + +/** Unwrapping according to RFC 5649 section 4.2. + * + * @param[in] key Key value. + * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv. + * @param[out] out Plaintext. Minimal buffer length = (inlen - 8) bytes. + * Input and output buffers can overlap if block function + * supports that. + * @param[in] in Ciphertext as n 64-bit blocks. + * @param[in] inlen Length of in. + * @param[in] block Block processing function. + * @return 0 if inlen is out of range [16, CRYPTO128_WRAP_MAX], + * or if inlen is not a multiple of 8 + * or if IV and message length indicator doesn't match. + * Output length if unwrapping succeeded and IV matches. + */ +size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + /* n: number of 64-bit blocks in the padded key data */ + size_t n = inlen / 8 - 1; + size_t padded_len; + size_t padding_len; + size_t ptext_len; + /* RFC 5649 section 3: Alternative Initial Value */ + unsigned char aiv[8]; + static unsigned char zeros[8] = { 0x0 }; + size_t ret; + + /* Section 4.2: Ciphertext length has to be (n+1) 64-bit blocks. */ + if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX) + return 0; + + if (inlen == 16) { + /* + * Section 4.2 - special case in step 1: When n=1, the ciphertext + * contains exactly two 64-bit blocks and they are decrypted as a + * single AES block using AES in ECB mode: AIV | P[1] = DEC(K, C[0] | + * C[1]) + */ + unsigned char buff[16]; + + block(in, buff, key); + memcpy(aiv, buff, 8); + /* Remove AIV */ + memcpy(out, buff + 8, 8); + padded_len = 8; + OPENSSL_cleanse(buff, inlen); + } else { + padded_len = inlen - 8; + ret = crypto_128_unwrap_raw(key, aiv, out, in, inlen, block); + if (padded_len != ret) { + OPENSSL_cleanse(out, inlen); + return 0; + } + } + + /* + * Section 3: AIV checks: Check that MSB(32,A) = A65959A6. Optionally a + * user-supplied value can be used (even if standard doesn't mention + * this). + */ + if ((!icv && CRYPTO_memcmp(aiv, default_aiv, 4)) + || (icv && CRYPTO_memcmp(aiv, icv, 4))) { OPENSSL_cleanse(out, inlen); return 0; } - return inlen; + + /* + * Check that 8*(n-1) < LSB(32,AIV) <= 8*n. If so, let ptext_len = + * LSB(32,AIV). + */ + + ptext_len = ((unsigned int)aiv[4] << 24) + | ((unsigned int)aiv[5] << 16) + | ((unsigned int)aiv[6] << 8) + | (unsigned int)aiv[7]; + if (8 * (n - 1) >= ptext_len || ptext_len > 8 * n) { + OPENSSL_cleanse(out, inlen); + return 0; + } + + /* + * Check that the rightmost padding_len octets of the output data are + * zero. + */ + padding_len = padded_len - ptext_len; + if (CRYPTO_memcmp(out + ptext_len, zeros, padding_len) != 0) { + OPENSSL_cleanse(out, inlen); + return 0; + } + + /* Section 4.2 step 3: Remove padding */ + return ptext_len; } diff --git a/crypto/modes/xts128.c b/crypto/modes/xts128.c index 8f2af588b26d..81b1eacd5920 100644 --- a/crypto/modes/xts128.c +++ b/crypto/modes/xts128.c @@ -1,63 +1,16 @@ -/* ==================================================================== - * Copyright (c) 2011 The OpenSSL Project. All rights reserved. +/* + * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/crypto.h> #include "modes_lcl.h" #include <string.h> -#ifndef MODES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], const unsigned char *inp, unsigned char *out, |