diff options
Diffstat (limited to 'crypto')
1373 files changed, 165215 insertions, 174586 deletions
diff --git a/crypto/LPdir_unix.c b/crypto/LPdir_unix.c index c97e260492b9..356089d7fd34 100644 --- a/crypto/LPdir_unix.c +++ b/crypto/LPdir_unix.c @@ -1,5 +1,17 @@ /* - * Copyright (c) 2004, Richard Levitte <richard@levitte.org> + * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/* + * This file is dual-licensed and is also available under the following + * terms: + * + * Copyright (c) 2004, 2018, Richard Levitte <richard@levitte.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +46,9 @@ #ifndef LPDIR_H # include "LPdir.h" #endif +#ifdef __VMS +# include <ctype.h> +#endif /* * The POSIXly macro for the maximum number of characters in a file path is @@ -61,6 +76,10 @@ struct LP_dir_context_st { DIR *dir; char entry_name[LP_ENTRY_SIZE + 1]; +#ifdef __VMS + int expect_file_generations; + char previous_entry_name[LP_ENTRY_SIZE + 1]; +#endif }; const char *LP_find_file(LP_DIR_CTX **ctx, const char *directory) @@ -74,12 +93,21 @@ const char *LP_find_file(LP_DIR_CTX **ctx, const char *directory) errno = 0; if (*ctx == NULL) { - *ctx = (LP_DIR_CTX *)malloc(sizeof(LP_DIR_CTX)); + *ctx = malloc(sizeof(**ctx)); if (*ctx == NULL) { errno = ENOMEM; return 0; } - memset(*ctx, '\0', sizeof(LP_DIR_CTX)); + memset(*ctx, 0, sizeof(**ctx)); + +#ifdef __VMS + { + char c = directory[strlen(directory) - 1]; + + if (c == ']' || c == '>' || c == ':') + (*ctx)->expect_file_generations = 1; + } +#endif (*ctx)->dir = opendir(directory); if ((*ctx)->dir == NULL) { @@ -91,6 +119,13 @@ const char *LP_find_file(LP_DIR_CTX **ctx, const char *directory) } } +#ifdef __VMS + strncpy((*ctx)->previous_entry_name, (*ctx)->entry_name, + sizeof((*ctx)->previous_entry_name)); + + again: +#endif + direntry = readdir((*ctx)->dir); if (direntry == NULL) { return 0; @@ -99,6 +134,18 @@ const char *LP_find_file(LP_DIR_CTX **ctx, const char *directory) strncpy((*ctx)->entry_name, direntry->d_name, sizeof((*ctx)->entry_name) - 1); (*ctx)->entry_name[sizeof((*ctx)->entry_name) - 1] = '\0'; +#ifdef __VMS + if ((*ctx)->expect_file_generations) { + char *p = (*ctx)->entry_name + strlen((*ctx)->entry_name); + + while(p > (*ctx)->entry_name && isdigit(p[-1])) + p--; + if (p > (*ctx)->entry_name && p[-1] == ';') + p[-1] = '\0'; + if (strcasecmp((*ctx)->entry_name, (*ctx)->previous_entry_name) == 0) + goto again; + } +#endif return (*ctx)->entry_name; } diff --git a/crypto/Makefile b/crypto/Makefile deleted file mode 100644 index 7869996a9c07..000000000000 --- a/crypto/Makefile +++ /dev/null @@ -1,221 +0,0 @@ -# -# OpenSSL/crypto/Makefile -# - -DIR= crypto -TOP= .. -CC= cc -INCLUDE= -I. -I$(TOP) -I../include $(ZLIB_INCLUDE) -# INCLUDES targets sudbirs! -INCLUDES= -I.. -I../.. -I../modes -I../asn1 -I../evp -I../../include $(ZLIB_INCLUDE) -CFLAG= -g -MAKEDEPPROG= makedepend -MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG) -MAKEFILE= Makefile -RM= rm -f -AR= ar r - -RECURSIVE_MAKE= [ -n "$(SDIRS)" ] && for i in $(SDIRS) ; do \ - (cd $$i && echo "making $$target in $(DIR)/$$i..." && \ - $(MAKE) -e TOP=../.. DIR=$$i INCLUDES='$(INCLUDES)' $$target ) || exit 1; \ - done; - -PEX_LIBS= -EX_LIBS= - -CFLAGS= $(INCLUDE) $(CFLAG) -ASFLAGS= $(INCLUDE) $(ASFLAG) -AFLAGS=$(ASFLAGS) -CPUID_OBJ=mem_clr.o - -LIBS= - -GENERAL=Makefile README crypto-lib.com install.com -TEST=constant_time_test.c - -LIB= $(TOP)/libcrypto.a -SHARED_LIB= libcrypto$(SHLIB_EXT) -LIBSRC= cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c cpt_err.c \ - ebcdic.c uid.c o_time.c o_str.c o_dir.c o_fips.c o_init.c fips_ers.c -LIBOBJ= cryptlib.o mem.o mem_dbg.o cversion.o ex_data.o cpt_err.o ebcdic.o \ - uid.o o_time.o o_str.o o_dir.o o_fips.o o_init.o fips_ers.o $(CPUID_OBJ) - -SRC= $(LIBSRC) - -EXHEADER= crypto.h opensslv.h opensslconf.h ebcdic.h symhacks.h \ - ossl_typ.h -HEADER= cryptlib.h buildinf.h md32_common.h o_time.h o_str.h o_dir.h \ - constant_time_locl.h $(EXHEADER) - -ALL= $(GENERAL) $(SRC) $(HEADER) - -top: - @(cd ..; $(MAKE) DIRS=$(DIR) all) - -all: shared - -buildinf.h: ../Makefile - $(PERL) $(TOP)/util/mkbuildinf.pl "$(CC) $(CFLAGS)" "$(PLATFORM)" >buildinf.h - -x86cpuid.s: x86cpuid.pl perlasm/x86asm.pl - $(PERL) x86cpuid.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ - -applink.o: $(TOP)/ms/applink.c - $(CC) $(CFLAGS) -c -o $@ $(TOP)/ms/applink.c - -uplink.o: $(TOP)/ms/uplink.c applink.o - $(CC) $(CFLAGS) -c -o $@ $(TOP)/ms/uplink.c - -uplink-x86.s: $(TOP)/ms/uplink-x86.pl - $(PERL) $(TOP)/ms/uplink-x86.pl $(PERLASM_SCHEME) > $@ - -x86_64cpuid.s: x86_64cpuid.pl; $(PERL) x86_64cpuid.pl $(PERLASM_SCHEME) > $@ -ia64cpuid.s: ia64cpuid.S; $(CC) $(CFLAGS) -E ia64cpuid.S > $@ -ppccpuid.s: ppccpuid.pl; $(PERL) ppccpuid.pl $(PERLASM_SCHEME) $@ -pariscid.s: pariscid.pl; $(PERL) pariscid.pl $(PERLASM_SCHEME) $@ -alphacpuid.s: alphacpuid.pl - (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ - $(PERL) alphacpuid.pl > $$preproc && \ - $(CC) -E -P $$preproc > $@ && rm $$preproc) - -testapps: - [ -z "$(THIS)" ] || ( if echo $(SDIRS) | fgrep ' des '; \ - then cd des && $(MAKE) -e des; fi ) - [ -z "$(THIS)" ] || ( cd pkcs7 && $(MAKE) -e testapps ); - @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi - -subdirs: - @target=all; $(RECURSIVE_MAKE) - -files: - $(PERL) $(TOP)/util/files.pl "CPUID_OBJ=$(CPUID_OBJ)" Makefile >> $(TOP)/MINFO - @target=files; $(RECURSIVE_MAKE) - -links: - @$(PERL) $(TOP)/util/mklink.pl ../include/openssl $(EXHEADER) - @$(PERL) $(TOP)/util/mklink.pl ../test $(TEST) - @$(PERL) $(TOP)/util/mklink.pl ../apps $(APPS) - @target=links; $(RECURSIVE_MAKE) - -# lib: $(LIB): are splitted to avoid end-less loop -lib: $(LIB) - @touch lib -$(LIB): $(LIBOBJ) - $(AR) $(LIB) $(LIBOBJ) - test -z "$(FIPSLIBDIR)" || $(AR) $(LIB) $(FIPSLIBDIR)fipscanister.o - $(RANLIB) $(LIB) || echo Never mind. - -shared: buildinf.h lib subdirs - if [ -n "$(SHARED_LIBS)" ]; then \ - (cd ..; $(MAKE) $(SHARED_LIB)); \ - fi - -libs: - @target=lib; $(RECURSIVE_MAKE) - -install: - @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... - @headerlist="$(EXHEADER)"; for i in $$headerlist ;\ - do \ - (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ - chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ - done; - @target=install; $(RECURSIVE_MAKE) - -lint: - @target=lint; $(RECURSIVE_MAKE) - -update: local_depend - @[ -z "$(THIS)" ] || (set -e; target=update; $(RECURSIVE_MAKE) ) - @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi - -depend: local_depend - @[ -z "$(THIS)" ] || (set -e; target=depend; $(RECURSIVE_MAKE) ) - @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi -local_depend: - @[ -z "$(THIS)" -o -f buildinf.h ] || touch buildinf.h # fake buildinf.h if it does not exist - @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDE) $(DEPFLAG) -- $(PROGS) $(LIBSRC) - @[ -z "$(THIS)" -o -s buildinf.h ] || rm buildinf.h - -clean: - rm -f buildinf.h *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff - @target=clean; $(RECURSIVE_MAKE) - -dclean: - $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new - mv -f Makefile.new $(MAKEFILE) - rm -f opensslconf.h - @target=dclean; $(RECURSIVE_MAKE) - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -cpt_err.o: ../include/openssl/bio.h ../include/openssl/crypto.h -cpt_err.o: ../include/openssl/e_os2.h ../include/openssl/err.h -cpt_err.o: ../include/openssl/lhash.h ../include/openssl/opensslconf.h -cpt_err.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -cpt_err.o: ../include/openssl/safestack.h ../include/openssl/stack.h -cpt_err.o: ../include/openssl/symhacks.h cpt_err.c -cryptlib.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -cryptlib.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -cryptlib.o: ../include/openssl/err.h ../include/openssl/lhash.h -cryptlib.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -cryptlib.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -cryptlib.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.c -cryptlib.o: cryptlib.h -cversion.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -cversion.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -cversion.o: ../include/openssl/err.h ../include/openssl/lhash.h -cversion.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -cversion.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -cversion.o: ../include/openssl/stack.h ../include/openssl/symhacks.h buildinf.h -cversion.o: cryptlib.h cversion.c -ebcdic.o: ../include/openssl/e_os2.h ../include/openssl/opensslconf.h ebcdic.c -ex_data.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -ex_data.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -ex_data.o: ../include/openssl/err.h ../include/openssl/lhash.h -ex_data.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -ex_data.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -ex_data.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h -ex_data.o: ex_data.c -fips_ers.o: ../include/openssl/opensslconf.h fips_ers.c -mem.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -mem.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -mem.o: ../include/openssl/err.h ../include/openssl/lhash.h -mem.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -mem.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -mem.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h -mem.o: mem.c -mem_clr.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -mem_clr.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -mem_clr.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -mem_clr.o: ../include/openssl/stack.h ../include/openssl/symhacks.h mem_clr.c -mem_dbg.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -mem_dbg.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -mem_dbg.o: ../include/openssl/err.h ../include/openssl/lhash.h -mem_dbg.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -mem_dbg.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -mem_dbg.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h -mem_dbg.o: mem_dbg.c -o_dir.o: ../e_os.h ../include/openssl/e_os2.h ../include/openssl/opensslconf.h -o_dir.o: LPdir_unix.c o_dir.c o_dir.h -o_fips.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h -o_fips.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -o_fips.o: ../include/openssl/err.h ../include/openssl/lhash.h -o_fips.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -o_fips.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -o_fips.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h -o_fips.o: o_fips.c -o_init.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/crypto.h -o_init.o: ../include/openssl/e_os2.h ../include/openssl/err.h -o_init.o: ../include/openssl/lhash.h ../include/openssl/opensslconf.h -o_init.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -o_init.o: ../include/openssl/safestack.h ../include/openssl/stack.h -o_init.o: ../include/openssl/symhacks.h o_init.c -o_str.o: ../e_os.h ../include/openssl/e_os2.h ../include/openssl/opensslconf.h -o_str.o: o_str.c o_str.h -o_time.o: ../include/openssl/e_os2.h ../include/openssl/opensslconf.h o_time.c -o_time.o: o_time.h -uid.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -uid.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -uid.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h -uid.o: ../include/openssl/stack.h ../include/openssl/symhacks.h uid.c diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile deleted file mode 100644 index 05e4a0149ed8..000000000000 --- a/crypto/aes/Makefile +++ /dev/null @@ -1,171 +0,0 @@ -# -# crypto/aes/Makefile -# - -DIR= aes -TOP= ../.. -CC= cc -CPP= $(CC) -E -INCLUDES= -CFLAG=-g -MAKEFILE= Makefile -AR= ar r - -AES_ENC=aes_core.o aes_cbc.o - -CFLAGS= $(INCLUDES) $(CFLAG) -ASFLAGS= $(INCLUDES) $(ASFLAG) -AFLAGS= $(ASFLAGS) - -GENERAL=Makefile -#TEST=aestest.c -TEST= -APPS= - -LIB=$(TOP)/libcrypto.a -LIBSRC=aes_core.c aes_misc.c aes_ecb.c aes_cbc.c aes_cfb.c aes_ofb.c \ - aes_ctr.c aes_ige.c aes_wrap.c -LIBOBJ=aes_misc.o aes_ecb.o aes_cfb.o aes_ofb.o aes_ctr.o aes_ige.o aes_wrap.o \ - $(AES_ENC) - -SRC= $(LIBSRC) - -EXHEADER= aes.h -HEADER= aes_locl.h $(EXHEADER) - -ALL= $(GENERAL) $(SRC) $(HEADER) - -top: - (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) - -all: lib - -lib: $(LIBOBJ) - $(AR) $(LIB) $(LIBOBJ) - $(RANLIB) $(LIB) || echo Never mind. - @touch lib - -aes-ia64.s: asm/aes-ia64.S - $(CC) $(CFLAGS) -E asm/aes-ia64.S > $@ - -aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl - $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -vpaes-x86.s: asm/vpaes-x86.pl ../perlasm/x86asm.pl - $(PERL) asm/vpaes-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl - $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ - -aes-x86_64.s: asm/aes-x86_64.pl - $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@ -vpaes-x86_64.s: asm/vpaes-x86_64.pl - $(PERL) asm/vpaes-x86_64.pl $(PERLASM_SCHEME) > $@ -bsaes-x86_64.s: asm/bsaes-x86_64.pl - $(PERL) asm/bsaes-x86_64.pl $(PERLASM_SCHEME) > $@ -aesni-x86_64.s: asm/aesni-x86_64.pl - $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ -aesni-sha1-x86_64.s: asm/aesni-sha1-x86_64.pl - $(PERL) asm/aesni-sha1-x86_64.pl $(PERLASM_SCHEME) > $@ -aesni-sha256-x86_64.s: asm/aesni-sha256-x86_64.pl - $(PERL) asm/aesni-sha256-x86_64.pl $(PERLASM_SCHEME) > $@ -aesni-mb-x86_64.s: asm/aesni-mb-x86_64.pl - $(PERL) asm/aesni-mb-x86_64.pl $(PERLASM_SCHEME) > $@ - -aes-sparcv9.s: asm/aes-sparcv9.pl - $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ -aest4-sparcv9.s: asm/aest4-sparcv9.pl ../perlasm/sparcv9_modes.pl - $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@ - -aes-ppc.s: asm/aes-ppc.pl - $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ -vpaes-ppc.s: asm/vpaes-ppc.pl - $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ -aesp8-ppc.s: asm/aesp8-ppc.pl - $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ - -aes-parisc.s: asm/aes-parisc.pl - $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ - -aes-mips.S: asm/aes-mips.pl - $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@ - -aesv8-armx.S: asm/aesv8-armx.pl - $(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@ -aesv8-armx.o: aesv8-armx.S - -# GNU make "catch all" -aes-%.S: asm/aes-%.pl; $(PERL) $< $(PERLASM_SCHEME) > $@ -aes-armv4.o: aes-armv4.S -bsaes-%.S: asm/bsaes-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ -bsaes-armv7.o: bsaes-armv7.S - -files: - $(PERL) $(TOP)/util/files.pl "AES_ENC=$(AES_ENC)" Makefile >> $(TOP)/MINFO - -links: - @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) - @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) - @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) - -install: - @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... - @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ - do \ - (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ - chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ - done; - -tags: - ctags $(SRC) - -tests: - -lint: - lint -DLINT $(INCLUDES) $(SRC)>fluff - -update: depend - -depend: - @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) - -dclean: - $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new - mv -f Makefile.new $(MAKEFILE) - -clean: - rm -f *.s *.S *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff - -# DO NOT DELETE THIS LINE -- make depend depends on it. - -aes_cbc.o: ../../include/openssl/aes.h ../../include/openssl/modes.h -aes_cbc.o: ../../include/openssl/opensslconf.h aes_cbc.c -aes_cfb.o: ../../include/openssl/aes.h ../../include/openssl/modes.h -aes_cfb.o: ../../include/openssl/opensslconf.h aes_cfb.c -aes_core.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h -aes_core.o: ../../include/openssl/opensslconf.h aes_core.c aes_locl.h -aes_ctr.o: ../../include/openssl/aes.h ../../include/openssl/modes.h -aes_ctr.o: ../../include/openssl/opensslconf.h aes_ctr.c -aes_ecb.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h -aes_ecb.o: ../../include/openssl/opensslconf.h aes_ecb.c aes_locl.h -aes_ige.o: ../../e_os.h ../../include/openssl/aes.h ../../include/openssl/bio.h -aes_ige.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -aes_ige.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -aes_ige.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -aes_ige.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -aes_ige.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -aes_ige.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_ige.c aes_locl.h -aes_misc.o: ../../include/openssl/aes.h ../../include/openssl/crypto.h -aes_misc.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h -aes_misc.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -aes_misc.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -aes_misc.o: ../../include/openssl/symhacks.h aes_locl.h aes_misc.c -aes_ofb.o: ../../include/openssl/aes.h ../../include/openssl/modes.h -aes_ofb.o: ../../include/openssl/opensslconf.h aes_ofb.c -aes_wrap.o: ../../e_os.h ../../include/openssl/aes.h -aes_wrap.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h -aes_wrap.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -aes_wrap.o: ../../include/openssl/err.h ../../include/openssl/lhash.h -aes_wrap.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h -aes_wrap.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -aes_wrap.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -aes_wrap.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_wrap.c diff --git a/crypto/aes/README b/crypto/aes/README deleted file mode 100644 index 0f9620a80eca..000000000000 --- a/crypto/aes/README +++ /dev/null @@ -1,3 +0,0 @@ -This is an OpenSSL-compatible version of AES (also called Rijndael). -aes_core.c is basically the same as rijndael-alg-fst.c but with an -API that looks like the rest of the OpenSSL symmetric cipher suite. diff --git a/crypto/aes/aes.h b/crypto/aes/aes.h deleted file mode 100644 index faa66c49148f..000000000000 --- a/crypto/aes/aes.h +++ /dev/null @@ -1,149 +0,0 @@ -/* crypto/aes/aes.h */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - */ - -#ifndef HEADER_AES_H -# define HEADER_AES_H - -# include <openssl/opensslconf.h> - -# ifdef OPENSSL_NO_AES -# error AES is disabled. -# endif - -# include <stddef.h> - -# define AES_ENCRYPT 1 -# define AES_DECRYPT 0 - -/* - * Because array size can't be a const in C, the following two are macros. - * Both sizes are in bytes. - */ -# define AES_MAXNR 14 -# define AES_BLOCK_SIZE 16 - -#ifdef __cplusplus -extern "C" { -#endif - -/* This should be a hidden type, but EVP requires that the size be known */ -struct aes_key_st { -# ifdef AES_LONG - unsigned long rd_key[4 * (AES_MAXNR + 1)]; -# else - unsigned int rd_key[4 * (AES_MAXNR + 1)]; -# endif - int rounds; -}; -typedef struct aes_key_st AES_KEY; - -const char *AES_options(void); - -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); - -int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); -int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key); - -void AES_encrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key); -void AES_decrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key); - -void AES_ecb_encrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key, const int enc); -void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, const int enc); -void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, int *num, const int enc); -void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, int *num, const int enc); -void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, int *num, const int enc); -void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, int *num); -void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char ivec[AES_BLOCK_SIZE], - unsigned char ecount_buf[AES_BLOCK_SIZE], - unsigned int *num); -/* NB: the IV is _two_ blocks long */ -void AES_ige_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char *ivec, const int enc); -/* NB: the IV is _four_ blocks long */ -void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - const AES_KEY *key2, const unsigned char *ivec, - const int enc); - -int AES_wrap_key(AES_KEY *key, const unsigned char *iv, - unsigned char *out, - const unsigned char *in, unsigned int inlen); -int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, - unsigned char *out, - const unsigned char *in, unsigned int inlen); - - -#ifdef __cplusplus -} -#endif - -#endif /* !HEADER_AES_H */ diff --git a/crypto/aes/aes_cbc.c b/crypto/aes/aes_cbc.c index 805d0e260a6f..342841fc4ff7 100644 --- a/crypto/aes/aes_cbc.c +++ b/crypto/aes/aes_cbc.c @@ -1,52 +1,10 @@ -/* crypto/aes/aes_cbc.c */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/aes.h> diff --git a/crypto/aes/aes_cfb.c b/crypto/aes/aes_cfb.c index 1225000963ea..f010e3c4ea9e 100644 --- a/crypto/aes/aes_cfb.c +++ b/crypto/aes/aes_cfb.c @@ -1,52 +1,10 @@ -/* crypto/aes/aes_cfb.c */ -/* ==================================================================== - * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/aes.h> diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c index 7019b5d7aa3a..f1f11fd8de7b 100644 --- a/crypto/aes/aes_core.c +++ b/crypto/aes/aes_core.c @@ -1,4 +1,12 @@ -/* crypto/aes/aes_core.c */ +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + /** * rijndael-alg-fst.c * @@ -6,9 +14,9 @@ * * Optimised ANSI C code for the Rijndael cipher (now AES) * - * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> - * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be> - * @author Paulo Barreto <paulo.barreto@terra.com.br> + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto * * This code is hereby placed in the public domain. * @@ -28,14 +36,10 @@ /* Note: rewritten a little bit to provide error control and an OpenSSL- compatible API */ -#ifndef AES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif #include <assert.h> #include <stdlib.h> +#include <openssl/crypto.h> #include <openssl/aes.h> #include "aes_locl.h" @@ -625,8 +629,8 @@ static const u32 rcon[] = { /** * Expand the cipher key into the encryption key schedule. */ -int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { u32 *rk; @@ -640,9 +644,9 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, rk = key->rd_key; - if (bits==128) + if (bits == 128) key->rounds = 10; - else if (bits==192) + else if (bits == 192) key->rounds = 12; else key->rounds = 14; @@ -727,8 +731,8 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, /** * Expand the cipher key into the decryption key schedule. */ -int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { u32 *rk; @@ -736,7 +740,7 @@ int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, u32 temp; /* first, start with an encryption schedule */ - status = private_AES_set_encrypt_key(userKey, bits, key); + status = AES_set_encrypt_key(userKey, bits, key); if (status < 0) return status; @@ -1204,11 +1208,11 @@ static const u32 rcon[] = { /** * Expand the cipher key into the encryption key schedule. */ -int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { u32 *rk; - int i = 0; + int i = 0; u32 temp; if (!userKey || !key) @@ -1218,9 +1222,9 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, rk = key->rd_key; - if (bits==128) + if (bits == 128) key->rounds = 10; - else if (bits==192) + else if (bits == 192) key->rounds = 12; else key->rounds = 14; @@ -1305,8 +1309,8 @@ int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, /** * Expand the cipher key into the decryption key schedule. */ -int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) { u32 *rk; @@ -1314,7 +1318,7 @@ int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, u32 temp; /* first, start with an encryption schedule */ - status = private_AES_set_encrypt_key(userKey, bits, key); + status = AES_set_encrypt_key(userKey, bits, key); if (status < 0) return status; @@ -1351,7 +1355,7 @@ int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, rk[j] = tpe ^ ROTATE(tpd,16) ^ ROTATE(tp9,24) ^ ROTATE(tpb,8); #else - rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ + rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ (tp9 >> 8) ^ (tp9 << 24) ^ (tpb >> 24) ^ (tpb << 8); #endif diff --git a/crypto/aes/aes_ctr.c b/crypto/aes/aes_ctr.c deleted file mode 100644 index 9e760c4b12ad..000000000000 --- a/crypto/aes/aes_ctr.c +++ /dev/null @@ -1,63 +0,0 @@ -/* crypto/aes/aes_ctr.c */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - */ - -#include <openssl/aes.h> -#include <openssl/modes.h> - -void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const AES_KEY *key, - unsigned char ivec[AES_BLOCK_SIZE], - unsigned char ecount_buf[AES_BLOCK_SIZE], - unsigned int *num) -{ - CRYPTO_ctr128_encrypt(in, out, length, key, ivec, ecount_buf, num, - (block128_f) AES_encrypt); -} diff --git a/crypto/aes/aes_ecb.c b/crypto/aes/aes_ecb.c index 52151a5c70f4..29bfc1ad66f7 100644 --- a/crypto/aes/aes_ecb.c +++ b/crypto/aes/aes_ecb.c @@ -1,59 +1,12 @@ -/* crypto/aes/aes_ecb.c */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#ifndef AES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif #include <assert.h> #include <openssl/aes.h> diff --git a/crypto/aes/aes_ige.c b/crypto/aes/aes_ige.c index 8f2b7706472a..75f796cf3b8f 100644 --- a/crypto/aes/aes_ige.c +++ b/crypto/aes/aes_ige.c @@ -1,55 +1,13 @@ -/* crypto/aes/aes_ige.c */ -/* ==================================================================== - * Copyright (c) 2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include <openssl/aes.h> #include "aes_locl.h" @@ -83,6 +41,9 @@ void AES_ige_encrypt(const unsigned char *in, unsigned char *out, size_t n; size_t len = length; + if (length == 0) + return; + OPENSSL_assert(in && out && key && ivec); OPENSSL_assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc)); OPENSSL_assert((length % AES_BLOCK_SIZE) == 0); diff --git a/crypto/aes/aes_locl.h b/crypto/aes/aes_locl.h index 7acd74ec1603..adee29df8d91 100644 --- a/crypto/aes/aes_locl.h +++ b/crypto/aes/aes_locl.h @@ -1,63 +1,16 @@ -/* crypto/aes/aes.h */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #ifndef HEADER_AES_LOCL_H # define HEADER_AES_LOCL_H # include <openssl/e_os2.h> - -# ifdef OPENSSL_NO_AES -# error AES is disabled. -# endif - # include <stdio.h> # include <stdlib.h> # include <string.h> diff --git a/crypto/aes/aes_misc.c b/crypto/aes/aes_misc.c index fafad4d6f57a..7403c84f82d2 100644 --- a/crypto/aes/aes_misc.c +++ b/crypto/aes/aes_misc.c @@ -1,61 +1,16 @@ -/* crypto/aes/aes_misc.c */ -/* ==================================================================== - * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/opensslv.h> -#include <openssl/crypto.h> #include <openssl/aes.h> #include "aes_locl.h" -const char AES_version[] = "AES" OPENSSL_VERSION_PTEXT; - const char *AES_options(void) { #ifdef FULL_UNROLL @@ -64,23 +19,3 @@ const char *AES_options(void) return "aes(partial)"; #endif } - -/* FIPS wrapper functions to block low level AES calls in FIPS mode */ - -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) -{ -#ifdef OPENSSL_FIPS - fips_cipher_abort(AES); -#endif - return private_AES_set_encrypt_key(userKey, bits, key); -} - -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) -{ -#ifdef OPENSSL_FIPS - fips_cipher_abort(AES); -#endif - return private_AES_set_decrypt_key(userKey, bits, key); -} diff --git a/crypto/aes/aes_ofb.c b/crypto/aes/aes_ofb.c index 64a08caaec6d..215b53858eb6 100644 --- a/crypto/aes/aes_ofb.c +++ b/crypto/aes/aes_ofb.c @@ -1,52 +1,10 @@ -/* crypto/aes/aes_ofb.c */ -/* ==================================================================== - * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== +/* + * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ #include <openssl/aes.h> diff --git a/crypto/aes/aes_wrap.c b/crypto/aes/aes_wrap.c index b7b64d57a487..cae0b212297a 100644 --- a/crypto/aes/aes_wrap.c +++ b/crypto/aes/aes_wrap.c @@ -1,58 +1,13 @@ -/* crypto/aes/aes_wrap.c */ /* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2008 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html */ -#include "cryptlib.h" +#include "internal/cryptlib.h" #include <openssl/aes.h> #include <openssl/modes.h> diff --git a/crypto/aes/aes_x86core.c b/crypto/aes/aes_x86core.c index b5dd6976772a..1b660d716d6c 100644 --- a/crypto/aes/aes_x86core.c +++ b/crypto/aes/aes_x86core.c @@ -1,4 +1,20 @@ -/* crypto/aes/aes_core.c */ +/* + * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/* + * This is experimental x86[_64] derivative. It assumes little-endian + * byte order and expects CPU to sustain unaligned memory references. + * It is used as playground for cache-time attack mitigations and + * serves as reference C implementation for x86[_64] as well as some + * other assembly modules. + */ + /** * rijndael-alg-fst.c * @@ -6,9 +22,9 @@ * * Optimised ANSI C code for the Rijndael cipher (now AES) * - * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> - * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be> - * @author Paulo Barreto <paulo.barreto@terra.com.br> + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto * * This code is hereby placed in the public domain. * @@ -25,21 +41,7 @@ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * This is experimental x86[_64] derivative. It assumes little-endian - * byte order and expects CPU to sustain unaligned memory references. - * It is used as playground for cache-time attack mitigations and - * serves as reference C implementation for x86[_64] assembler. - * - * <appro@fy.chalmers.se> - */ - -#ifndef AES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif #include <assert.h> #include <stdlib.h> @@ -618,7 +620,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits, rk[j] = tpe ^ ROTATE(tpd,16) ^ ROTATE(tp9,8) ^ ROTATE(tpb,24); #else - rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ + rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ (tp9 >> 24) ^ (tp9 << 8) ^ (tpb >> 8) ^ (tpb << 24); #endif @@ -907,7 +909,7 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, (u32)Td4[(s1 >> 16) & 0xff] << 16 ^ (u32)Td4[(s0 >> 24) ] << 24; - /* now do the linear transform using words */ + /* now do the linear transform using words */ { int i; u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; @@ -931,7 +933,7 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, t[i] = tpe ^ ROTATE(tpd,16) ^ ROTATE(tp9,8) ^ ROTATE(tpb,24); #else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ + t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ (tp9 >> 24) ^ (tp9 << 8) ^ (tpb >> 8) ^ (tpb << 24); #endif @@ -984,7 +986,7 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, (u32)Td4[(s1 >> 16) & 0xff] << 16 ^ (u32)Td4[(s0 >> 24) ] << 24; - /* now do the linear transform using words */ + /* now do the linear transform using words */ { int i; u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; @@ -1008,7 +1010,7 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, t[i] = tpe ^ ROTATE(tpd,16) ^ ROTATE(tp9,8) ^ ROTATE(tpb,24); #else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ + t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ (tp9 >> 24) ^ (tp9 << 8) ^ (tpb >> 8) ^ (tpb << 24); #endif diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl index 60286ecb9645..29059edf8b7a 100755 --- a/crypto/aes/asm/aes-586.pl +++ b/crypto/aes/asm/aes-586.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -32,7 +39,7 @@ # for scaling too, I [try to] avoid the latter by favoring off-by-2 # shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF. # -# As was shown by Dean Gaudet <dean@arctic.org>, the above note turned +# As was shown by Dean Gaudet, the above note turned out to be # void. Performance improvement with off-by-2 shifts was observed on # intermediate implementation, which was spilling yet another register # to stack... Final offset*4 code below runs just a tad faster on P4, @@ -48,8 +55,8 @@ # better performance on most recent µ-archs... # # Third version adds AES_cbc_encrypt implementation, which resulted in -# up to 40% performance imrovement of CBC benchmark results. 40% was -# observed on P4 core, where "overall" imrovement coefficient, i.e. if +# up to 40% performance improvement of CBC benchmark results. 40% was +# observed on P4 core, where "overall" improvement coefficient, i.e. if # compared to PIC generated by GCC and in CBC mode, was observed to be # as large as 4x:-) CBC performance is virtually identical to ECB now # and on some platforms even better, e.g. 17.6 "small" cycles/byte on @@ -116,7 +123,7 @@ # words every cache-line is *guaranteed* to be accessed within ~50 # cycles window. Why just SSE? Because it's needed on hyper-threading # CPU! Which is also why it's prefetched with 64 byte stride. Best -# part is that it has no negative effect on performance:-) +# part is that it has no negative effect on performance:-) # # Version 4.3 implements switch between compact and non-compact block # functions in AES_cbc_encrypt depending on how much data was asked @@ -152,7 +159,7 @@ # combinations then attack becomes infeasible. This is why revised # AES_cbc_encrypt "dares" to switch to larger S-box when larger chunk # of data is to be processed in one stroke. The current size limit of -# 512 bytes is chosen to provide same [diminishigly low] probability +# 512 bytes is chosen to provide same [diminishingly low] probability # for cache-line to remain untouched in large chunk operation with # large S-box as for single block operation with compact S-box and # surely needs more careful consideration... @@ -164,12 +171,12 @@ # yield execution to process performing AES just before timer fires # off the scheduler, immediately regain control of CPU and analyze the # cache state. For this attack to be efficient attacker would have to -# effectively slow down the operation by several *orders* of magnitute, +# effectively slow down the operation by several *orders* of magnitude, # by ratio of time slice to duration of handful of AES rounds, which # unlikely to remain unnoticed. Not to mention that this also means -# that he would spend correspondigly more time to collect enough +# that he would spend correspondingly more time to collect enough # statistical data to mount the attack. It's probably appropriate to -# say that if adeversary reckons that this attack is beneficial and +# say that if adversary reckons that this attack is beneficial and # risks to be noticed, you probably have larger problems having him # mere opportunity. In other words suggested code design expects you # to preclude/mitigate this attack by overall system security design. @@ -191,7 +198,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386"); +$output = pop; +open OUT,">$output"; +*STDOUT=*OUT; + +&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386"); &static_label("AES_Te"); &static_label("AES_Td"); @@ -229,7 +240,7 @@ $small_footprint=1; # $small_footprint=1 code is ~5% slower [on # contention and in hope to "collect" 5% back # in real-life applications... -$vertical_spin=0; # shift "verticaly" defaults to 0, because of +$vertical_spin=0; # shift "vertically" defaults to 0, because of # its proof-of-concept status... # Note that there is no decvert(), as well as last encryption round is # performed with "horizontal" shifts. This is because this "vertical" @@ -574,7 +585,7 @@ sub enctransform() # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ # | mm4 | mm0 | # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ -# | s3 | s2 | s1 | s0 | +# | s3 | s2 | s1 | s0 | # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ # |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0| # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ @@ -794,7 +805,7 @@ sub encstep() if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] - else { &mov ($tmp,$s[3]); + else { &mov ($tmp,$s[3]); &shr ($tmp,24) } &xor ($out,&DWP(1,$te,$tmp,8)); if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } @@ -1547,7 +1558,7 @@ sub sse_deccompact() &pxor ("mm1","mm3"); &pxor ("mm5","mm7"); # tp4 &pshufw ("mm3","mm1",0xb1); &pshufw ("mm7","mm5",0xb1); &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp4 - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= ROTATE(tp4,16) + &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= ROTATE(tp4,16) &pxor ("mm3","mm3"); &pxor ("mm7","mm7"); &pcmpgtb("mm3","mm1"); &pcmpgtb("mm7","mm5"); @@ -1595,7 +1606,7 @@ sub decstep() # no instructions are reordered, as performance appears # optimal... or rather that all attempts to reorder didn't # result in better performance [which by the way is not a - # bit lower than ecryption]. + # bit lower than encryption]. if($i==3) { &mov ($key,$__key); } else { &mov ($out,$s[0]); } &and ($out,0xFF); @@ -2017,7 +2028,7 @@ sub declast() { # stack frame layout # -4(%esp) # return address 0(%esp) -# 0(%esp) # s0 backing store 4(%esp) +# 0(%esp) # s0 backing store 4(%esp) # 4(%esp) # s1 backing store 8(%esp) # 8(%esp) # s2 backing store 12(%esp) # 12(%esp) # s3 backing store 16(%esp) @@ -2727,7 +2738,7 @@ sub enckey() &mov (&DWP(80,"edi"),10); # setup number of rounds &xor ("eax","eax"); &jmp (&label("exit")); - + &set_label("12rounds"); &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords &mov ("ebx",&DWP(4,"esi")); @@ -2861,12 +2872,12 @@ sub enckey() &set_label("exit"); &function_end("_x86_AES_set_encrypt_key"); -# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) -&function_begin_B("private_AES_set_encrypt_key"); +&function_begin_B("AES_set_encrypt_key"); &call ("_x86_AES_set_encrypt_key"); &ret (); -&function_end_B("private_AES_set_encrypt_key"); +&function_end_B("AES_set_encrypt_key"); sub deckey() { my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_; @@ -2923,9 +2934,9 @@ sub deckey() &mov (&DWP(4*$i,$key),$tp1); } -# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) -&function_begin_B("private_AES_set_decrypt_key"); +&function_begin_B("AES_set_decrypt_key"); &call ("_x86_AES_set_encrypt_key"); &cmp ("eax",0); &je (&label("proceed")); @@ -2981,7 +2992,9 @@ sub deckey() &jb (&label("permute")); &xor ("eax","eax"); # return success -&function_end("private_AES_set_decrypt_key"); +&function_end("AES_set_decrypt_key"); &asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>"); &asm_finish(); + +close STDOUT; diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl index c1b5e352d76f..998158998e90 100755 --- a/crypto/aes/asm/aes-armv4.pl +++ b/crypto/aes/asm/aes-armv4.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -32,8 +39,20 @@ # Profiler-assisted and platform-specific optimization resulted in 16% # improvement on Cortex A8 core and ~21.5 cycles per byte. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $s0="r0"; $s1="r1"; @@ -58,15 +77,12 @@ $code=<<___; #endif .text -#if __ARM_ARCH__<7 -.code 32 -#else +#if defined(__thumb2__) && !defined(__APPLE__) .syntax unified -# ifdef __thumb2__ .thumb -# else +#else .code 32 -# endif +#undef __thumb2__ #endif .type AES_Te,%object @@ -181,15 +197,19 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_encrypt #else adr r3,. #endif stmdb sp!,{r1,r4-r12,lr} +#if defined(__thumb2__) || defined(__APPLE__) + adr $tbl,AES_Te +#else + sub $tbl,r3,#AES_encrypt-AES_Te @ Te +#endif mov $rounds,r0 @ inp mov $key,r2 - sub $tbl,r3,#AES_encrypt-AES_Te @ Te #if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... @@ -422,24 +442,24 @@ _armv4_AES_encrypt: ldr pc,[sp],#4 @ pop and return .size _armv4_AES_encrypt,.-_armv4_AES_encrypt -.global private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,%function +.global AES_set_encrypt_key +.type AES_set_encrypt_key,%function .align 5 -private_AES_set_encrypt_key: +AES_set_encrypt_key: _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,. #endif teq r0,#0 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 @@ -450,19 +470,23 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 bne .Labrt .Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - mov $rounds,r0 @ inp mov lr,r1 @ bits mov $key,r2 @ key +#if defined(__thumb2__) || defined(__APPLE__) + adr $tbl,AES_Te+1024 @ Te4 +#else + sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 +#endif + #if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... @@ -607,7 +631,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#216 @@ -679,7 +703,7 @@ _armv4_AES_set_encrypt_key: str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,$key,#256 @@ -722,12 +746,12 @@ _armv4_AES_set_encrypt_key: moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.size AES_set_encrypt_key,.-AES_set_encrypt_key -.global private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,%function +.global AES_set_decrypt_key +.type AES_set_decrypt_key,%function .align 5 -private_AES_set_decrypt_key: +AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 @@ -737,7 +761,7 @@ private_AES_set_decrypt_key: mov r0,r2 @ AES_set_encrypt_key preserves r2, mov r1,r2 @ which is AES_KEY *key b _armv4_AES_set_enc2dec_key -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_decrypt_key,.-AES_set_decrypt_key @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) .global AES_set_enc2dec_key @@ -750,7 +774,7 @@ _armv4_AES_set_enc2dec_key: ldr $rounds,[r0,#240] mov $i1,r0 @ input add $i2,r0,$rounds,lsl#4 - mov $key,r1 @ ouput + mov $key,r1 @ output add $tbl,r1,$rounds,lsl#4 str $rounds,[r1,#240] @@ -949,15 +973,19 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_decrypt #else adr r3,. #endif stmdb sp!,{r1,r4-r12,lr} +#if defined(__thumb2__) || defined(__APPLE__) + adr $tbl,AES_Td +#else + sub $tbl,r3,#AES_decrypt-AES_Td @ Td +#endif mov $rounds,r0 @ inp mov $key,r2 - sub $tbl,r3,#AES_decrypt-AES_Td @ Td #if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... diff --git a/crypto/aes/asm/aes-c64xplus.pl b/crypto/aes/asm/aes-c64xplus.pl new file mode 100755 index 000000000000..19d2cc176fb2 --- /dev/null +++ b/crypto/aes/asm/aes-c64xplus.pl @@ -0,0 +1,1382 @@ +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# [Endian-neutral] AES for C64x+. +# +# Even though SPLOOPs are scheduled for 13 cycles, and thus expected +# performance is ~8.5 cycles per byte processed with 128-bit key, +# measured performance turned to be ~10 cycles per byte. Discrepancy +# must be caused by limitations of L1D memory banking(*), see SPRU871 +# TI publication for further details. If any consolation it's still +# ~20% faster than TI's linear assembly module anyway... Compared to +# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this +# code is 3.75x faster and almost 3x smaller (tables included). +# +# (*) This means that there might be subtle correlation between data +# and timing and one can wonder if it can be ... attacked:-( +# On the other hand this also means that *if* one chooses to +# implement *4* T-tables variant [instead of 1 T-table as in +# this implementation, or in addition to], then one ought to +# *interleave* them. Even though it complicates addressing, +# references to interleaved tables would be guaranteed not to +# clash. I reckon that it should be possible to break 8 cycles +# per byte "barrier," i.e. improve by ~20%, naturally at the +# cost of 8x increased pressure on L1D. 8x because you'd have +# to interleave both Te and Td tables... + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($TEA,$TEB)=("A5","B5"); +($KPA,$KPB)=("A3","B1"); +@K=("A6","B6","A7","B7"); +@s=("A8","B8","A9","B9"); +@Te0=@Td0=("A16","B16","A17","B17"); +@Te1=@Td1=("A18","B18","A19","B19"); +@Te2=@Td2=("A20","B20","A21","B21"); +@Te3=@Td3=("A22","B22","A23","B23"); + +$code=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .nocmp + .asg AES_encrypt,_AES_encrypt + .asg AES_decrypt,_AES_decrypt + .asg AES_set_encrypt_key,_AES_set_encrypt_key + .asg AES_set_decrypt_key,_AES_set_decrypt_key + .asg AES_ctr32_encrypt,_AES_ctr32_encrypt + .endif + + .asg B3,RA + .asg A4,INP + .asg B4,OUT + .asg A6,KEY + .asg A4,RET + .asg B15,SP + + .eval 24,EXT0 + .eval 16,EXT1 + .eval 8,EXT2 + .eval 0,EXT3 + .eval 8,TBL1 + .eval 16,TBL2 + .eval 24,TBL3 + + .if .BIG_ENDIAN + .eval 24-EXT0,EXT0 + .eval 24-EXT1,EXT1 + .eval 24-EXT2,EXT2 + .eval 24-EXT3,EXT3 + .eval 32-TBL1,TBL1 + .eval 32-TBL2,TBL2 + .eval 32-TBL3,TBL3 + .endif + + .global _AES_encrypt +_AES_encrypt: + .asmfunc + MVK 1,B2 +__encrypt: + .if __TI_EABI__ + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL \$PCR_OFFSET(AES_Te,__encrypt),$TEA +|| ADDKPC __encrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH \$PCR_OFFSET(AES_Te,__encrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .else + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Te-__encrypt),$TEA +|| ADDKPC __encrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Te-__encrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .endif + LDW *$KPA++[2],$Te0[0] ; zero round key +|| LDW *$KPB++[2],$Te0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Te + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Te) + LDW *$KPA++[2],$Te0[2] +|| LDW *$KPB++[2],$Te0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Te0[0],$s[0],$s[0] +|| XOR $Te0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + SUB B0,2,B0 + + SPLOOPD 13 +|| MVC B0,ILC +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +;;==================================================================== + EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] + LDW *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDW *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDW *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDW *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDW *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDW *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDW *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDW *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDW *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDW *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDW *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDW *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| ROTL $Te1[1],TBL1,$Te3[0] ; t0 +|| ROTL $Te3[0],TBL3,$Te1[1] ; t1 +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDW *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDW *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| ROTL $Te3[1],TBL3,$Te1[0] ; t2 +|| ROTL $Te1[0],TBL1,$Te3[1] ; t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] + LDW *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDW *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| ROTL $Te2[2],TBL2,$Te2[2] ; t0 +|| ROTL $Te2[3],TBL2,$Te2[3] ; t1 +|| XOR $K[0],$Te3[0],$s[0] +|| XOR $K[1],$Te1[1],$s[1] + ROTL $Te3[3],TBL3,$Te1[2] ; t0 +|| ROTL $Te1[2],TBL1,$Te3[3] ; t1 +|| XOR $K[2],$Te1[0],$s[2] +|| XOR $K[3],$Te3[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Te2[0],TBL2,$Te2[0] ; t2 +|| ROTL $Te2[1],TBL2,$Te2[1] ; t3 +|| XOR $s[0],$Te2[2],$s[0] +|| XOR $s[1],$Te2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Te1[3],TBL1,$Te3[2] ; t2 +|| ROTL $Te3[2],TBL3,$Te1[3] ; t3 +|| XOR $s[0],$Te1[2],$s[0] +|| XOR $s[1],$Te3[3],$s[1] + XOR $s[2],$Te2[0],$s[2] +|| XOR $s[3],$Te2[1],$s[3] +|| XOR $s[0],$Te0[0],$s[0] +|| XOR $s[1],$Te0[1],$s[1] + SPKERNEL +|| XOR.L $s[2],$Te3[2],$s[2] +|| XOR.L $s[3],$Te1[3],$s[3] +;;==================================================================== + ADD.D ${TEA},A0,${TEA} ; point to Te4 +|| ADD.D ${TEB},A0,${TEB} +|| EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] + LDBU *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDBU *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDBU *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDBU *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDBU *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDBU *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDBU *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDBU *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDBU *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDBU *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDBU *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDBU *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] + LDBU *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDBU *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDBU *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDBU *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 + + .if .BIG_ENDIAN + PACK2 $Te0[0],$Te1[1],$Te0[0] +|| PACK2 $Te0[1],$Te1[2],$Te0[1] + PACK2 $Te2[2],$Te3[3],$Te2[2] +|| PACK2 $Te2[3],$Te3[0],$Te2[3] + PACKL4 $Te0[0],$Te2[2],$Te0[0] +|| PACKL4 $Te0[1],$Te2[3],$Te0[1] + XOR $K[0],$Te0[0],$Te0[0] ; s[0] +|| XOR $K[1],$Te0[1],$Te0[1] ; s[1] + + PACK2 $Te0[2],$Te1[3],$Te0[2] +|| PACK2 $Te0[3],$Te1[0],$Te0[3] + PACK2 $Te2[0],$Te3[1],$Te2[0] +|| PACK2 $Te2[1],$Te3[2],$Te2[1] +|| BNOP RA + PACKL4 $Te0[2],$Te2[0],$Te0[2] +|| PACKL4 $Te0[3],$Te2[1],$Te0[3] + XOR $K[2],$Te0[2],$Te0[2] ; s[2] +|| XOR $K[3],$Te0[3],$Te0[3] ; s[3] + + MV $Te0[0],A9 +|| MV $Te0[1],A8 + MV $Te0[2],B9 +|| MV $Te0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Te1[1],$Te0[0],$Te1[1] +|| PACK2 $Te1[2],$Te0[1],$Te1[2] + PACK2 $Te3[3],$Te2[2],$Te3[3] +|| PACK2 $Te3[0],$Te2[3],$Te3[0] + PACKL4 $Te3[3],$Te1[1],$Te1[1] +|| PACKL4 $Te3[0],$Te1[2],$Te1[2] + XOR $K[0],$Te1[1],$Te1[1] ; s[0] +|| XOR $K[1],$Te1[2],$Te1[2] ; s[1] + + PACK2 $Te1[3],$Te0[2],$Te1[3] +|| PACK2 $Te1[0],$Te0[3],$Te1[0] + PACK2 $Te3[1],$Te2[0],$Te3[1] +|| PACK2 $Te3[2],$Te2[1],$Te3[2] +|| BNOP RA + PACKL4 $Te3[1],$Te1[3],$Te1[3] +|| PACKL4 $Te3[2],$Te1[0],$Te1[0] + XOR $K[2],$Te1[3],$Te1[3] ; s[2] +|| XOR $K[3],$Te1[0],$Te1[0] ; s[3] + + MV $Te1[1],A8 +|| MV $Te1[2],A9 + MV $Te1[3],B8 +|| MV $Te1[0],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc + + .global _AES_decrypt +_AES_decrypt: + .asmfunc + MVK 1,B2 +__decrypt: + .if __TI_EABI__ + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL \$PCR_OFFSET(AES_Td,__decrypt),$TEA +|| ADDKPC __decrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH \$PCR_OFFSET(AES_Td,__decrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .else + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Td-__decrypt),$TEA +|| ADDKPC __decrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Td-__decrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .endif + LDW *$KPA++[2],$Td0[0] ; zero round key +|| LDW *$KPB++[2],$Td0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Td + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Td) + LDW *$KPA++[2],$Td0[2] +|| LDW *$KPB++[2],$Td0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Td0[0],$s[0],$s[0] +|| XOR $Td0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + SUB B0,2,B0 + + SPLOOPD 13 +|| MVC B0,ILC +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +;;==================================================================== + EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] + LDW *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDW *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDW *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDW *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDW *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDW *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDW *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDW *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDW *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDW *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDW *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDW *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| ROTL $Td3[1],TBL3,$Td1[0] ; t0 +|| ROTL $Td1[0],TBL1,$Td3[1] ; t1 +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDW *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDW *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| ROTL $Td1[1],TBL1,$Td3[0] ; t2 +|| ROTL $Td3[0],TBL3,$Td1[1] ; t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] + LDW *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDW *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 +|| ROTL $Td2[2],TBL2,$Td2[2] ; t0 +|| ROTL $Td2[3],TBL2,$Td2[3] ; t1 +|| XOR $K[0],$Td1[0],$s[0] +|| XOR $K[1],$Td3[1],$s[1] + ROTL $Td1[3],TBL1,$Td3[2] ; t0 +|| ROTL $Td3[2],TBL3,$Td1[3] ; t1 +|| XOR $K[2],$Td3[0],$s[2] +|| XOR $K[3],$Td1[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Td2[0],TBL2,$Td2[0] ; t2 +|| ROTL $Td2[1],TBL2,$Td2[1] ; t3 +|| XOR $s[0],$Td2[2],$s[0] +|| XOR $s[1],$Td2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Td3[3],TBL3,$Td1[2] ; t2 +|| ROTL $Td1[2],TBL1,$Td3[3] ; t3 +|| XOR $s[0],$Td3[2],$s[0] +|| XOR $s[1],$Td1[3],$s[1] + XOR $s[2],$Td2[0],$s[2] +|| XOR $s[3],$Td2[1],$s[3] +|| XOR $s[0],$Td0[0],$s[0] +|| XOR $s[1],$Td0[1],$s[1] + SPKERNEL +|| XOR.L $s[2],$Td1[2],$s[2] +|| XOR.L $s[3],$Td3[3],$s[3] +;;==================================================================== + ADD.D ${TEA},A0,${TEA} ; point to Td4 +|| ADD.D ${TEB},A0,${TEB} +|| EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] + LDBU *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDBU *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDBU *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDBU *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDBU *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDBU *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDBU *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDBU *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDBU *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDBU *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDBU *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDBU *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDBU *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDBU *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] + LDBU *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDBU *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 + + .if .BIG_ENDIAN + PACK2 $Td0[0],$Td1[3],$Td0[0] +|| PACK2 $Td0[1],$Td1[0],$Td0[1] + PACK2 $Td2[2],$Td3[1],$Td2[2] +|| PACK2 $Td2[3],$Td3[2],$Td2[3] + PACKL4 $Td0[0],$Td2[2],$Td0[0] +|| PACKL4 $Td0[1],$Td2[3],$Td0[1] + XOR $K[0],$Td0[0],$Td0[0] ; s[0] +|| XOR $K[1],$Td0[1],$Td0[1] ; s[1] + + PACK2 $Td0[2],$Td1[1],$Td0[2] +|| PACK2 $Td0[3],$Td1[2],$Td0[3] + PACK2 $Td2[0],$Td3[3],$Td2[0] +|| PACK2 $Td2[1],$Td3[0],$Td2[1] +|| BNOP RA + PACKL4 $Td0[2],$Td2[0],$Td0[2] +|| PACKL4 $Td0[3],$Td2[1],$Td0[3] + XOR $K[2],$Td0[2],$Td0[2] ; s[2] +|| XOR $K[3],$Td0[3],$Td0[3] ; s[3] + + MV $Td0[0],A9 +|| MV $Td0[1],A8 + MV $Td0[2],B9 +|| MV $Td0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Td1[3],$Td0[0],$Td1[3] +|| PACK2 $Td1[0],$Td0[1],$Td1[0] + PACK2 $Td3[1],$Td2[2],$Td3[1] +|| PACK2 $Td3[2],$Td2[3],$Td3[2] + PACKL4 $Td3[1],$Td1[3],$Td1[3] +|| PACKL4 $Td3[2],$Td1[0],$Td1[0] + XOR $K[0],$Td1[3],$Td1[3] ; s[0] +|| XOR $K[1],$Td1[0],$Td1[0] ; s[1] + + PACK2 $Td1[1],$Td0[2],$Td1[1] +|| PACK2 $Td1[2],$Td0[3],$Td1[2] + PACK2 $Td3[3],$Td2[0],$Td3[3] +|| PACK2 $Td3[0],$Td2[1],$Td3[0] +|| BNOP RA + PACKL4 $Td3[3],$Td1[1],$Td1[1] +|| PACKL4 $Td3[0],$Td1[2],$Td1[2] + XOR $K[2],$Td1[1],$Td1[1] ; s[2] +|| XOR $K[3],$Td1[2],$Td1[2] ; s[3] + + MV $Td1[3],A8 +|| MV $Td1[0],A9 + MV $Td1[1],B8 +|| MV $Td1[2],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc +___ +{ +my @K=(@K,@s); # extended key +my @Te4=map("B$_",(16..19)); + +my @Kx9=@Te0; # used in AES_set_decrypt_key +my @KxB=@Te1; +my @KxD=@Te2; +my @KxE=@Te3; + +$code.=<<___; + .asg OUT,BITS + + .global _AES_set_encrypt_key +_AES_set_encrypt_key: +__set_encrypt_key: + .asmfunc + MV INP,A0 +|| SHRU BITS,5,BITS ; 128-192-256 -> 4-6-8 +|| MV KEY,A1 + [!A0] B RA +||[!A0] MVK -1,RET +||[!A0] MVK 1,A1 ; only one B RA + [!A1] B RA +||[!A1] MVK -1,RET +||[!A1] MVK 0,A0 +|| MVK 0,B0 +|| MVK 0,A1 + [A0] LDNDW *INP++,A9:A8 +|| [A0] CMPEQ 4,BITS,B0 +|| [A0] CMPLT 3,BITS,A1 + [B0] B key128? +|| [A1] LDNDW *INP++,B9:B8 +|| [A0] CMPEQ 6,BITS,B0 +|| [A0] CMPLT 5,BITS,A1 + [B0] B key192? +|| [A1] LDNDW *INP++,B17:B16 +|| [A0] CMPEQ 8,BITS,B0 +|| [A0] CMPLT 7,BITS,A1 + [B0] B key256? +|| [A1] LDNDW *INP++,B19:B18 + + .if __TI_EABI__ + [A0] ADD 0,KEY,$KPA +|| [A0] ADD 4,KEY,$KPB +|| [A0] MVKL \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA +|| [A0] ADDKPC __set_encrypt_key,B6 + [A0] MVKH \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA + [A0] ADD B6,$TEA,$TEA ; AES_Te4 + .else + [A0] ADD 0,KEY,$KPA +|| [A0] ADD 4,KEY,$KPB +|| [A0] MVKL (AES_Te4-__set_encrypt_key),$TEA +|| [A0] ADDKPC __set_encrypt_key,B6 + [A0] MVKH (AES_Te4-__set_encrypt_key),$TEA + [A0] ADD B6,$TEA,$TEA ; AES_Te4 + .endif + NOP + NOP + + BNOP RA,5 +|| MVK -2,RET ; unknown bit length +|| MVK 0,B0 ; redundant +;;==================================================================== +;;==================================================================== +key128?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$Te4[2] +|| MV B8,$K[3] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$Te4[2] +|| MV B9,$K[3] + .endif + + MVK 256,A0 +|| MVK 9,B0 + + SPLOOPD 14 +|| MVC B0,ILC +|| MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[2] +|| EXTU $K[3],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[3],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + SPKERNEL +;;==================================================================== + BNOP RA + MV $Te4[2],$K[2] +|| STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 10,B0 ; rounds + STW B0,*++${KPB}[15] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key192?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$Te4[2] +|| MV B16,$K[5] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$Te4[2] +|| MV B17,$K[5] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop192?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[4] +|| EXTU $K[5],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[5],A0 +|| EXTU $K[5],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[5],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + BDEC loop192?,B0 +|| XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + MV $Te4[2],$K[2] +|| XOR $K[3],$K[4],$Te4[2] ; K[4] + XOR $Te4[2],$K[5],$K[5] ; K[5] +;;==================================================================== + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 12,B0 ; rounds + STW B0,*++${KPB}[7] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key256?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$K[4] +|| MV B16,$K[5] +|| MV B19,$Te4[2] +|| MV B18,$K[7] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$K[4] +|| MV B17,$K[5] +|| MV B18,$Te4[2] +|| MV B19,$K[7] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop256?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[6] +|| EXTU $K[7],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[7],A0 +|| EXTU $K[7],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[7],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + STW $K[6],*$KPA++[2] +|| STW $K[7],*$KPB++[2] +|| XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] +||[!B0] B done256? + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] +||[!B0] B done256? + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + + MV $Te4[2],$K[2] +|| [B0] EXTU $K[3],EXT0,24,$Te4[0] +|| [B0] SUB B0,1,B0 + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT1,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT2,24,A0 +|| EXTU $K[3],EXT3,24,$Te4[3] + + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + NOP 3 + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + NOP 3 + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + + XOR $Te4[3],$K[4],$Te4[0] ; K[4] + XOR $Te4[0],$K[5],$K[5] ; K[5] + MV $Te4[0],$K[4] +|| XOR $K[5],$K[6],$Te4[2] ; K[6] + XOR $Te4[2],$K[7],$K[7] ; K[7] +;;==================================================================== +done256?: + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 14,B0 ; rounds + STW B0,*--${KPB}[1] + MVK 0,RET + .endasmfunc + + .global _AES_set_decrypt_key +_AES_set_decrypt_key: + .asmfunc + B __set_encrypt_key ; guarantee local call + MV KEY,B30 ; B30 is not modified + MV RA, B31 ; B31 is not modified + ADDKPC ret?,RA,2 +ret?: ; B0 holds rounds or zero + [!B0] BNOP B31 ; return if zero + [B0] SHL B0,4,A0 ; offset to last round key + [B0] SHRU B0,1,B1 + [B0] SUB B1,1,B1 + [B0] MVK 0x0000001B,B3 ; AES polynomial + [B0] MVKH 0x07000000,B3 + + SPLOOPD 9 ; flip round keys +|| MVC B1,ILC +|| MV B30,$KPA +|| ADD B30,A0,$KPB +|| MVK 16,A0 ; sizeof(round key) +;;==================================================================== + LDW *${KPA}[0],A16 +|| LDW *${KPB}[0],B16 + LDW *${KPA}[1],A17 +|| LDW *${KPB}[1],B17 + LDW *${KPA}[2],A18 +|| LDW *${KPB}[2],B18 + LDW *${KPA}[3],A19 +|| ADD $KPA,A0,$KPA +|| LDW *${KPB}[3],B19 +|| SUB $KPB,A0,$KPB + NOP + STW B16,*${KPA}[-4] +|| STW A16,*${KPB}[4] + STW B17,*${KPA}[-3] +|| STW A17,*${KPB}[5] + STW B18,*${KPA}[-2] +|| STW A18,*${KPB}[6] + STW B19,*${KPA}[-1] +|| STW A19,*${KPB}[7] + SPKERNEL +;;==================================================================== + SUB B0,1,B0 ; skip last round +|| ADD B30,A0,$KPA ; skip first round +|| ADD B30,A0,$KPB +|| MVC GFPGFR,B30 ; save GFPGFR + LDW *${KPA}[0],$K[0] +|| LDW *${KPB}[1],$K[1] +|| MVC B3,GFPGFR + LDW *${KPA}[2],$K[2] +|| LDW *${KPB}[3],$K[3] + MVK 0x00000909,A24 +|| MVK 0x00000B0B,B24 + MVKH 0x09090000,A24 +|| MVKH 0x0B0B0000,B24 + MVC B0,ILC +|| SUB B0,1,B0 + + GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| GMPY4 $K[1],A24,$Kx9[1] +|| MVK 0x00000D0D,A25 +|| MVK 0x00000E0E,B25 + GMPY4 $K[2],A24,$Kx9[2] +|| GMPY4 $K[3],A24,$Kx9[3] +|| MVKH 0x0D0D0000,A25 +|| MVKH 0x0E0E0000,B25 + + GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| GMPY4 $K[1],B24,$KxB[1] + GMPY4 $K[2],B24,$KxB[2] +|| GMPY4 $K[3],B24,$KxB[3] + + SPLOOP 11 ; InvMixColumns +;;==================================================================== + GMPY4 $K[0],A25,$KxD[0] ; ·0x0D +|| GMPY4 $K[1],A25,$KxD[1] +|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16 +|| SWAP2 $Kx9[1],$Kx9[1] +|| MV $K[0],$s[0] ; this or DINT +|| MV $K[1],$s[1] +|| [B0] LDW *${KPA}[4],$K[0] +|| [B0] LDW *${KPB}[5],$K[1] + GMPY4 $K[2],A25,$KxD[2] +|| GMPY4 $K[3],A25,$KxD[3] +|| SWAP2 $Kx9[2],$Kx9[2] +|| SWAP2 $Kx9[3],$Kx9[3] +|| MV $K[2],$s[2] +|| MV $K[3],$s[3] +|| [B0] LDW *${KPA}[6],$K[2] +|| [B0] LDW *${KPB}[7],$K[3] + + GMPY4 $s[0],B25,$KxE[0] ; ·0x0E +|| GMPY4 $s[1],B25,$KxE[1] +|| XOR $Kx9[0],$KxB[0],$KxB[0] +|| XOR $Kx9[1],$KxB[1],$KxB[1] + GMPY4 $s[2],B25,$KxE[2] +|| GMPY4 $s[3],B25,$KxE[3] +|| XOR $Kx9[2],$KxB[2],$KxB[2] +|| XOR $Kx9[3],$KxB[3],$KxB[3] + + ROTL $KxB[0],TBL3,$KxB[0] +|| ROTL $KxB[1],TBL3,$KxB[1] +|| SWAP2 $KxD[0],$KxD[0] ; rotate by 16 +|| SWAP2 $KxD[1],$KxD[1] + ROTL $KxB[2],TBL3,$KxB[2] +|| ROTL $KxB[3],TBL3,$KxB[3] +|| SWAP2 $KxD[2],$KxD[2] +|| SWAP2 $KxD[3],$KxD[3] + + XOR $KxE[0],$KxD[0],$KxE[0] +|| XOR $KxE[1],$KxD[1],$KxE[1] +|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| [B0] GMPY4 $K[1],A24,$Kx9[1] +|| ADDAW $KPA,4,$KPA + XOR $KxE[2],$KxD[2],$KxE[2] +|| XOR $KxE[3],$KxD[3],$KxE[3] +|| [B0] GMPY4 $K[2],A24,$Kx9[2] +|| [B0] GMPY4 $K[3],A24,$Kx9[3] +|| ADDAW $KPB,4,$KPB + + XOR $KxB[0],$KxE[0],$KxE[0] +|| XOR $KxB[1],$KxE[1],$KxE[1] +|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| [B0] GMPY4 $K[1],B24,$KxB[1] + XOR $KxB[2],$KxE[2],$KxE[2] +|| XOR $KxB[3],$KxE[3],$KxE[3] +|| [B0] GMPY4 $K[2],B24,$KxB[2] +|| [B0] GMPY4 $K[3],B24,$KxB[3] +|| STW $KxE[0],*${KPA}[-4] +|| STW $KxE[1],*${KPB}[-3] + STW $KxE[2],*${KPA}[-2] +|| STW $KxE[3],*${KPB}[-1] +|| [B0] SUB B0,1,B0 + SPKERNEL +;;==================================================================== + BNOP B31,3 + MVC B30,GFPGFR ; restore GFPGFR(*) + MVK 0,RET + .endasmfunc +___ +# (*) Even though ABI doesn't specify GFPGFR as non-volatile, there +# are code samples out there that *assume* its default value. +} +{ +my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8"); +$code.=<<___; + .global _AES_ctr32_encrypt +_AES_ctr32_encrypt: + .asmfunc + LDNDW *${ivp}[0],A31:A30 ; load counter value +|| MV $blocks,A2 ; reassign $blocks +|| DMV RA,$key,B27:B26 ; reassign RA and $key + LDNDW *${ivp}[1],B31:B30 +|| MVK 0,B2 ; don't let __encrypt load input +|| MVK 0,A1 ; and postpone writing output + .if .BIG_ENDIAN + NOP + .else + NOP 4 + SWAP2 B31,B31 ; keep least significant 32 bits + SWAP4 B31,B31 ; in host byte order + .endif +ctr32_loop?: + [A2] BNOP __encrypt +|| [A1] XOR A29,A9,A9 ; input^Ek(counter) +|| [A1] XOR A28,A8,A8 +|| [A2] LDNDW *INP++,A29:A28 ; load input + [!A2] BNOP B27 ; return +|| [A1] XOR B29,B9,B9 +|| [A1] XOR B28,B8,B8 +|| [A2] LDNDW *INP++,B29:B28 + .if .BIG_ENDIAN + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] DMV A31,A30,A9:A8 ; pass counter value to __encrypt + [A1] STNDW B9:B8,*OUT++ +|| [A2] DMV B31,B30,B9:B8 +|| [A2] ADD B30,1,B30 ; counter++ + .else + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] DMV A31,A30,A9:A8 +|| [A2] SWAP2 B31,B0 +|| [A2] ADD B31,1,B31 ; counter++ + [A1] STNDW B9:B8,*OUT++ +|| [A2] MV B30,B8 +|| [A2] SWAP4 B0,B9 + .endif + [A2] ADDKPC ctr32_loop?,RA ; return to ctr32_loop? +|| [A2] MV B26,KEY ; pass $key +|| [A2] SUB A2,1,A2 ; $blocks-- +||[!A1] MVK 1,A1 + NOP + NOP + .endasmfunc +___ +} +# Tables are kept in endian-neutral manner +$code.=<<___; + .if __TI_EABI__ + .sect ".text:aes_asm.const" + .else + .sect ".const:aes_asm" + .endif + .align 128 +AES_Te: + .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 + .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d + .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd + .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 + .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 + .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d + .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 + .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a + .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d + .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 + .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb + .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b + .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 + .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea + .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 + .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b + .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c + .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a + .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 + .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f + .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 + .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 + .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 + .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f + .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 + .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e + .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 + .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 + .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 + .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d + .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 + .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f + .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e + .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e + .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 + .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb + .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d + .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce + .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e + .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 + .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 + .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c + .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f + .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed + .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 + .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b + .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 + .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a + .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a + .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 + .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 + .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 + .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 + .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 + .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 + .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 + .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe + .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a + .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc + .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 + .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 + .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 + .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a + .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d + .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 + .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f + .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 + .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 + .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 + .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 + .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 + .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 + .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 + .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f + .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e + .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 + .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 + .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c + .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 + .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 + .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 + .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e + .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a + .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 + .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e + .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 + .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 + .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b + .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 + .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 + .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 + .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 + .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa + .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 + .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e + .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 + .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 + .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 + .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 + .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 + .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c + .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 + .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc + .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 + .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 + .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa + .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 + .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 + .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f + .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 + .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 + .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 + .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 + .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 + .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 + .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 + .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 + .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 + .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff + .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a + .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 + .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 + .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 + .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 + .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 + .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 + .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc + .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a +AES_Te4: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +rcon: + .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 + .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 + .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 + .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 + .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + .align 128 +AES_Td: + .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 + .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 + .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 + .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 + .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 + .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 + .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 + .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f + .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 + .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 + .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 + .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 + .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 + .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda + .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 + .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 + .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 + .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd + .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 + .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 + .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 + .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 + .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 + .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 + .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 + .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 + .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 + .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a + .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 + .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 + .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 + .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c + .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 + .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 + .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 + .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a + .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 + .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 + .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa + .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 + .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d + .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 + .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 + .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff + .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 + .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 + .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 + .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb + .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 + .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 + .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 + .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e + .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 + .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 + .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 + .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a + .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f + .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e + .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 + .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 + .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 + .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d + .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad + .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 + .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c + .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd + .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc + .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 + .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc + .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 + .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 + .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 + .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 + .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d + .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 + .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 + .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 + .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 + .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a + .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef + .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 + .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 + .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 + .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 + .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d + .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 + .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 + .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 + .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c + .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 + .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 + .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b + .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 + .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 + .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e + .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 + .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce + .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 + .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 + .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 + .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 + .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 + .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 + .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f + .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d + .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf + .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b + .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f + .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d + .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e + .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 + .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 + .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a + .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 + .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 + .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c + .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f + .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf + .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b + .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 + .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e + .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f + .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c + .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 + .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde + .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 + .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 + .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 +AES_Td4: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>" + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/aes/asm/aes-ia64.S b/crypto/aes/asm/aes-ia64.S index 7f6c4c366291..03f79b7ae3b7 100644 --- a/crypto/aes/asm/aes-ia64.S +++ b/crypto/aes/asm/aes-ia64.S @@ -1,5 +1,12 @@ +// Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// // ==================================================================== -// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL // project. Rights for redistribution and usage in source and binary // forms are granted according to the OpenSSL license. // ==================================================================== @@ -10,7 +17,7 @@ // 'and' which in turn can be assigned to M-port [there're double as // much M-ports as there're I-ports on Itanium 2]. By sacrificing few // registers for small constants (255, 24 and 16) to be used with -// 'shr' and 'and' instructions I can achieve better ILP, Intruction +// 'shr' and 'and' instructions I can achieve better ILP, Instruction // Level Parallelism, and performance. This code outperforms GCC 3.3 // generated code by over factor of 2 (two), GCC 3.4 - by 70% and // HP C - by 40%. Measured best-case scenario, i.e. aligned @@ -26,7 +33,7 @@ // 64 bytes line size and L2 - 128 bytes... .ident "aes-ia64.S, version 1.2" -.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>" .explicit .text diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl index 4de3ee26bb74..716c3356ead9 100755 --- a/crypto/aes/asm/aes-mips.pl +++ b/crypto/aes/asm/aes-mips.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -57,16 +64,18 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_LA="dla"; + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $PTR_INS="dins"; $REG_S="sd"; $REG_L="ld"; $PTR_SLL="dsll"; # incidentally works even on n32 $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_LA="la"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $PTR_INS="ins"; $REG_S="sw"; $REG_L="lw"; @@ -79,29 +88,23 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); +$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC}); -for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } +for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; my ($MSB,$LSB)=(0,3); # automatically converted to little-endian $code.=<<___; -.text -#ifdef OPENSSL_FIPSCANISTER -# include <openssl/fipssyms.h> -#endif - -#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) -#define _MIPS_ARCH_MIPS32R2 -#endif +#include "mips_arch.h" +.text #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif @@ -110,14 +113,14 @@ ___ {{{ my $FRAMESIZE=16*$SZREG; -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7); my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); my ($key0,$cnt)=($gp,$fp); -# instuction ordering is "stolen" from output from MIPSpro assembler +# instruction ordering is "stolen" from output from MIPSpro assembler # invoked with -mips3 -O3 arguments... $code.=<<___; .align 5 @@ -137,7 +140,7 @@ _mips_AES_encrypt: xor $s2,$t2 xor $s3,$t3 - sub $cnt,1 + subu $cnt,1 #if defined(__mips_smartmips) ext $i0,$s1,16,8 .Loop_enc: @@ -209,7 +212,7 @@ _mips_AES_encrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -400,7 +403,7 @@ _mips_AES_encrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -646,8 +649,14 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification ___ $code.=<<___; .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' + $PTR_LA $Tbl,AES_Te # PIC-ified 'load address' +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $s0,0($inp) + lw $s1,4($inp) + lw $s2,8($inp) + lw $s3,12($inp) +#else lwl $s0,0+$MSB($inp) lwl $s1,4+$MSB($inp) lwl $s2,8+$MSB($inp) @@ -656,9 +665,16 @@ $code.=<<___; lwr $s1,4+$LSB($inp) lwr $s2,8+$LSB($inp) lwr $s3,12+$LSB($inp) +#endif bal _mips_AES_encrypt +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + sw $s0,0($out) + sw $s1,4($out) + sw $s2,8($out) + sw $s3,12($out) +#else swr $s0,0+$LSB($out) swr $s1,4+$LSB($out) swr $s2,8+$LSB($out) @@ -667,6 +683,7 @@ $code.=<<___; swl $s1,4+$MSB($out) swl $s2,8+$MSB($out) swl $s3,12+$MSB($out) +#endif .set noreorder $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) @@ -711,7 +728,7 @@ _mips_AES_decrypt: xor $s2,$t2 xor $s3,$t3 - sub $cnt,1 + subu $cnt,1 #if defined(__mips_smartmips) ext $i0,$s3,16,8 .Loop_dec: @@ -783,7 +800,7 @@ _mips_AES_decrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -976,7 +993,7 @@ _mips_AES_decrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -1217,8 +1234,14 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification ___ $code.=<<___; .set reorder - la $Tbl,AES_Td # PIC-ified 'load address' + $PTR_LA $Tbl,AES_Td # PIC-ified 'load address' +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $s0,0($inp) + lw $s1,4($inp) + lw $s2,8($inp) + lw $s3,12($inp) +#else lwl $s0,0+$MSB($inp) lwl $s1,4+$MSB($inp) lwl $s2,8+$MSB($inp) @@ -1227,9 +1250,16 @@ $code.=<<___; lwr $s1,4+$LSB($inp) lwr $s2,8+$LSB($inp) lwr $s3,12+$LSB($inp) +#endif bal _mips_AES_decrypt +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + sw $s0,0($out) + sw $s1,4($out) + sw $s2,8($out) + sw $s3,12($out) +#else swr $s0,0+$LSB($out) swr $s1,4+$LSB($out) swr $s2,8+$LSB($out) @@ -1238,6 +1268,7 @@ $code.=<<___; swl $s1,4+$MSB($out) swl $s2,8+$MSB($out) swl $s3,12+$MSB($out) +#endif .set noreorder $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) @@ -1267,7 +1298,7 @@ ___ {{{ my $FRAMESIZE=8*$SZREG; -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000"; my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3); my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); @@ -1286,35 +1317,52 @@ _mips_AES_set_encrypt_key: $PTR_ADD $rcon,$Tbl,256 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk0,0($inp) # load 128 bits + lw $rk1,4($inp) + lw $rk2,8($inp) + lw $rk3,12($inp) +#else lwl $rk0,0+$MSB($inp) # load 128 bits lwl $rk1,4+$MSB($inp) lwl $rk2,8+$MSB($inp) lwl $rk3,12+$MSB($inp) - li $at,128 lwr $rk0,0+$LSB($inp) lwr $rk1,4+$LSB($inp) lwr $rk2,8+$LSB($inp) lwr $rk3,12+$LSB($inp) +#endif + li $at,128 .set noreorder beq $bits,$at,.L128bits li $cnt,10 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk4,16($inp) # load 192 bits + lw $rk5,20($inp) +#else lwl $rk4,16+$MSB($inp) # load 192 bits lwl $rk5,20+$MSB($inp) - li $at,192 lwr $rk4,16+$LSB($inp) lwr $rk5,20+$LSB($inp) +#endif + li $at,192 .set noreorder beq $bits,$at,.L192bits li $cnt,8 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk6,24($inp) # load 256 bits + lw $rk7,28($inp) +#else lwl $rk6,24+$MSB($inp) # load 256 bits lwl $rk7,28+$MSB($inp) - li $at,256 lwr $rk6,24+$LSB($inp) lwr $rk7,28+$LSB($inp) +#endif + li $at,256 .set noreorder beq $bits,$at,.L256bits li $cnt,7 @@ -1344,7 +1392,7 @@ _mips_AES_set_encrypt_key: sw $rk1,4($key) sw $rk2,8($key) sw $rk3,12($key) - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key,16 _bias $i0,24 @@ -1401,7 +1449,7 @@ _mips_AES_set_encrypt_key: sw $rk3,12($key) sw $rk4,16($key) sw $rk5,20($key) - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key,24 _bias $i0,24 @@ -1462,7 +1510,7 @@ _mips_AES_set_encrypt_key: sw $rk5,20($key) sw $rk6,24($key) sw $rk7,28($key) - sub $cnt,1 + subu $cnt,1 _bias $i0,24 _bias $i1,16 @@ -1528,9 +1576,9 @@ _mips_AES_set_encrypt_key: nop .end _mips_AES_set_encrypt_key -.globl private_AES_set_encrypt_key -.ent private_AES_set_encrypt_key -private_AES_set_encrypt_key: +.globl AES_set_encrypt_key +.ent AES_set_encrypt_key +AES_set_encrypt_key: .frame $sp,$FRAMESIZE,$ra .mask $SAVED_REGS_MASK,-$SZREG .set noreorder @@ -1552,11 +1600,11 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue ___ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification .cplocal $Tbl - .cpsetup $pf,$zero,private_AES_set_encrypt_key + .cpsetup $pf,$zero,AES_set_encrypt_key ___ $code.=<<___; .set reorder - la $Tbl,AES_Te4 # PIC-ified 'load address' + $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address' bal _mips_AES_set_encrypt_key @@ -1575,7 +1623,7 @@ ___ $code.=<<___; jr $ra $PTR_ADD $sp,$FRAMESIZE -.end private_AES_set_encrypt_key +.end AES_set_encrypt_key ___ my ($head,$tail)=($inp,$bits); @@ -1583,9 +1631,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); $code.=<<___; .align 5 -.globl private_AES_set_decrypt_key -.ent private_AES_set_decrypt_key -private_AES_set_decrypt_key: +.globl AES_set_decrypt_key +.ent AES_set_decrypt_key +AES_set_decrypt_key: .frame $sp,$FRAMESIZE,$ra .mask $SAVED_REGS_MASK,-$SZREG .set noreorder @@ -1607,11 +1655,11 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue ___ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification .cplocal $Tbl - .cpsetup $pf,$zero,private_AES_set_decrypt_key + .cpsetup $pf,$zero,AES_set_decrypt_key ___ $code.=<<___; .set reorder - la $Tbl,AES_Te4 # PIC-ified 'load address' + $PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address' bal _mips_AES_set_encrypt_key @@ -1644,7 +1692,7 @@ $code.=<<___; lw $tp1,16($key) # modulo-scheduled lui $x80808080,0x8080 - sub $cnt,1 + subu $cnt,1 or $x80808080,0x8080 sll $cnt,2 $PTR_ADD $key,16 @@ -1707,7 +1755,7 @@ $code.=<<___; lw $tp1,4($key) # modulo-scheduled xor $tpe,$tp2 #endif - sub $cnt,1 + subu $cnt,1 sw $tpe,0($key) $PTR_ADD $key,4 bnez $cnt,.Lmix @@ -1729,7 +1777,7 @@ ___ $code.=<<___; jr $ra $PTR_ADD $sp,$FRAMESIZE -.end private_AES_set_decrypt_key +.end AES_set_decrypt_key ___ }}} diff --git a/crypto/aes/asm/aes-parisc.pl b/crypto/aes/asm/aes-parisc.pl index 714dcfbbe3b7..e817c757f8c2 100755 --- a/crypto/aes/asm/aes-parisc.pl +++ b/crypto/aes/asm/aes-parisc.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -1005,18 +1012,27 @@ L\$AES_Td .STRINGZ "AES for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" ___ +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler/) { + $gnuas = 1; +} + foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; - # translate made up instructons: _ror, _srm + # translate made up instructions: _ror, _srm s/_ror(\s+)(%r[0-9]+),/shd$1$2,$2,/ or s/_srm(\s+%r[0-9]+),([0-9]+),/ $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; + s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); + s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); + s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); s/,\*/,/ if ($SIZE_T==4); s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8); + print $_,"\n"; } close STDOUT; diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl index 5b83016efa98..ca69df4c3e95 100755 --- a/crypto/aes/asm/aes-ppc.pl +++ b/crypto/aes/asm/aes-ppc.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -19,7 +26,7 @@ # February 2010 # # Rescheduling instructions to favour Power6 pipeline gave 10% -# performance improvement on the platfrom in question (and marginal +# performance improvement on the platform in question (and marginal # improvement even on others). It should be noted that Power6 fails # to process byte in 18 cycles, only in 23, because it fails to issue # 4 load instructions in two cycles, only in 3. As result non-compact @@ -1426,10 +1433,10 @@ $code.=<<___; xor $s1,$s1,$acc05 xor $s2,$s2,$acc06 xor $s3,$s3,$acc07 - xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 + xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) + xor $s1,$s1,$acc09 + xor $s2,$s2,$acc10 + xor $s3,$s3,$acc11 b Ldec_compact_loop .align 4 diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl index a8f4d29d1ce5..0c4005906650 100755 --- a/crypto/aes/asm/aes-s390x.pl +++ b/crypto/aes/asm/aes-s390x.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -37,7 +44,7 @@ # minimize/avoid Address Generation Interlock hazard and to favour # dual-issue z10 pipeline. This gave ~25% improvement on z10 and # almost 50% on z9. The gain is smaller on z10, because being dual- -# issue z10 makes it improssible to eliminate the interlock condition: +# issue z10 makes it impossible to eliminate the interlock condition: # critial path is not long enough. Yet it spends ~24 cycles per byte # processed with 128-bit key. # @@ -92,7 +99,7 @@ if ($flavour =~ /3[12]/) { $g="g"; } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $softonly=0; # allow hardware support @@ -122,6 +129,8 @@ sub _data_word() } $code=<<___; +#include "s390x_arch.h" + .text .type AES_Te,\@object @@ -397,7 +406,7 @@ _s390x_AES_encrypt: or $s1,$t1 or $t2,$i2 or $t3,$i3 - + srlg $i1,$s2,`8-3` # i0 srlg $i2,$s2,`16-3` # i1 nr $i1,$mask @@ -450,7 +459,7 @@ _s390x_AES_encrypt: x $s2,24($key) x $s3,28($key) - br $ra + br $ra .size _s390x_AES_encrypt,.-_s390x_AES_encrypt ___ @@ -772,17 +781,17 @@ _s390x_AES_decrypt: x $s2,24($key) x $s3,28($key) - br $ra + br $ra .size _s390x_AES_decrypt,.-_s390x_AES_decrypt ___ $code.=<<___; # void AES_set_encrypt_key(const unsigned char *in, int bits, # AES_KEY *key) { -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,\@function +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,\@function .align 16 -private_AES_set_encrypt_key: +AES_set_encrypt_key: _s390x_AES_set_encrypt_key: lghi $t0,0 cl${g}r $inp,$t0 @@ -806,7 +815,7 @@ _s390x_AES_set_encrypt_key: .Lproceed: ___ $code.=<<___ if (!$softonly); - # convert bits to km code, [128,192,256]->[18,19,20] + # convert bits to km(c) code, [128,192,256]->[18,19,20] lhi %r5,-128 lhi %r0,18 ar %r5,$bits @@ -814,13 +823,10 @@ $code.=<<___ if (!$softonly); ar %r5,%r0 larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security assist - jz .Lekey_internal - llihh %r0,0x8000 srlg %r0,%r0,0(%r5) - ng %r0,48(%r1) # check kmc capability vector + ng %r0,S390X_KM(%r1) # check availability of both km... + ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length jz .Lekey_internal lmg %r0,%r1,0($inp) # just copy 128 bits... @@ -835,7 +841,7 @@ $code.=<<___ if (!$softonly); stg %r1,24($key) 1: st $bits,236($key) # save bits [for debugging purposes] lgr $t0,%r5 - st %r5,240($key) # save km code + st %r5,240($key) # save km(c) code lghi %r2,0 br %r14 ___ @@ -1059,14 +1065,14 @@ $code.=<<___; .Lminus1: lghi %r2,-1 br $ra -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.size AES_set_encrypt_key,.-AES_set_encrypt_key # void AES_set_decrypt_key(const unsigned char *in, int bits, # AES_KEY *key) { -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,\@function +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,\@function .align 16 -private_AES_set_decrypt_key: +AES_set_decrypt_key: #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! bras $ra,_s390x_AES_set_encrypt_key @@ -1080,7 +1086,7 @@ $code.=<<___ if (!$softonly); lhi $t1,16 cr $t0,$t1 jl .Lgo - oill $t0,0x80 # set "decrypt" bit + oill $t0,S390X_DECRYPT # set "decrypt" bit st $t0,240($key) br $ra ___ @@ -1166,7 +1172,7 @@ $code.=<<___; lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! lghi %r2,0 br $ra -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_decrypt_key,.-AES_set_decrypt_key ___ ######################################################################## @@ -1219,7 +1225,7 @@ $code.=<<___ if (!$softonly); .align 16 .Lkmc_truncated: ahi $key,-1 # it's the way it's encoded in mvc - tmll %r0,0x80 + tmll %r0,S390X_DECRYPT jnz .Lkmc_truncated_dec lghi %r1,0 stg %r1,16*$SIZE_T($sp) @@ -1290,7 +1296,7 @@ $code.=<<___; .Lcbc_enc_done: l${g} $ivp,6*$SIZE_T($sp) st $s0,0($ivp) - st $s1,4($ivp) + st $s1,4($ivp) st $s2,8($ivp) st $s3,12($ivp) @@ -1399,7 +1405,61 @@ $code.=<<___ if (!$softonly); clr %r0,%r1 jl .Lctr32_software - stm${g} %r6,$s3,6*$SIZE_T($sp) + st${g} $s2,10*$SIZE_T($sp) + st${g} $s3,11*$SIZE_T($sp) + + clr $len,%r1 # does work even in 64-bit mode + jle .Lctr32_nokma # kma is slower for <= 16 blocks + + larl %r1,OPENSSL_s390xcap_P + lr $s2,%r0 + llihh $s3,0x8000 + srlg $s3,$s3,0($s2) + ng $s3,S390X_KMA(%r1) # check kma capability vector + jz .Lctr32_nokma + + l${g}hi %r1,-$stdframe-112 + l${g}r $s3,$sp + la $sp,0(%r1,$sp) # prepare parameter block + + lhi %r1,0x0600 + sllg $len,$len,4 + or %r0,%r1 # set HS and LAAD flags + + st${g} $s3,0($sp) # backchain + la %r1,$stdframe($sp) + + lmg $s2,$s3,0($key) # copy key + stg $s2,$stdframe+80($sp) + stg $s3,$stdframe+88($sp) + lmg $s2,$s3,16($key) + stg $s2,$stdframe+96($sp) + stg $s3,$stdframe+104($sp) + + lmg $s2,$s3,0($ivp) # copy iv + stg $s2,$stdframe+64($sp) + ahi $s3,-1 # kma requires counter-1 + stg $s3,$stdframe+72($sp) + st $s3,$stdframe+12($sp) # copy counter + + lghi $s2,0 # no AAD + lghi $s3,0 + + .long 0xb929a042 # kma $out,$s2,$inp + brc 1,.-4 # pay attention to "partial completion" + + stg %r0,$stdframe+80($sp) # wipe key + stg %r0,$stdframe+88($sp) + stg %r0,$stdframe+96($sp) + stg %r0,$stdframe+104($sp) + la $sp,$stdframe+112($sp) + + lm${g} $s2,$s3,10*$SIZE_T($sp) + br $ra + +.align 16 +.Lctr32_nokma: + stm${g} %r6,$s1,6*$SIZE_T($sp) slgr $out,$inp la %r1,0($key) # %r1 is permanent copy of $key @@ -1432,18 +1492,13 @@ $code.=<<___ if (!$softonly); .Lctr32_hw_switch: ___ -$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower - larl $s0,OPENSSL_s390xcap_P - lg $s0,8($s0) - tmhh $s0,0x0004 # check for message_security-assist-4 - jz .Lctr32_km_loop - +$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower llgfr $s0,%r0 lgr $s1,%r1 larl %r1,OPENSSL_s390xcap_P llihh %r0,0x8000 # check if kmctr supports the function code srlg %r0,%r0,0($s0) - ng %r0,64(%r1) # check kmctr capability vector + ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector lgr %r0,$s0 lgr %r1,$s1 jz .Lctr32_km_loop @@ -1481,7 +1536,7 @@ $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower br $ra .align 16 ___ -$code.=<<___; +$code.=<<___ if (!$softonly); .Lctr32_km_loop: la $s2,16($sp) lgr $s3,$fp @@ -1593,7 +1648,7 @@ $code.=<<___ if(1); larl %r1,OPENSSL_s390xcap_P llihh %r0,0x8000 srlg %r0,%r0,32($s1) # check for 32+function code - ng %r0,32(%r1) # check km capability vector + ng %r0,S390X_KM(%r1) # check km capability vector lgr %r0,$s0 # restore the function code la %r1,0($key1) # restore $key1 jz .Lxts_km_vanilla @@ -1628,7 +1683,7 @@ $code.=<<___ if(1); llgc $len,2*$SIZE_T-1($sp) nill $len,0x0f # $len%=16 br $ra - + .align 16 .Lxts_km_vanilla: ___ @@ -1855,7 +1910,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -1906,7 +1961,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -2098,7 +2153,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -2220,7 +2275,6 @@ ___ } $code.=<<___; .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" -.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/crypto/aes/asm/aes-sparcv9.pl b/crypto/aes/asm/aes-sparcv9.pl index 403c4d129048..40d1f94ccd72 100755 --- a/crypto/aes/asm/aes-sparcv9.pl +++ b/crypto/aes/asm/aes-sparcv9.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # ==================================================================== @@ -30,10 +37,11 @@ # optimal decrypt procedure]. Compared to GNU C generated code both # procedures are more than 60% faster:-) -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } +$output = pop; +open STDOUT,">$output"; + +$frame="STACK_FRAME"; +$bias="STACK_BIAS"; $locals=16; $acc0="%l0"; @@ -74,11 +82,13 @@ sub _data_word() while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } } -$code.=<<___ if ($bits==64); +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif .section ".text",#alloc,#execinstr .align 256 diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl index 47f416375d1e..4d1dc9c70199 100755 --- a/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/aes/asm/aes-x86_64.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -37,7 +44,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $verticalspin=1; # unlike 32-bit version $verticalspin performs @@ -592,15 +599,23 @@ $code.=<<___; .hidden asm_AES_encrypt asm_AES_encrypt: AES_encrypt: +.cfi_startproc + mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule - mov %rsp,%r10 lea -63(%rdx),%rcx # %rdx is key argument and \$-64,%rsp sub %rsp,%rcx @@ -610,7 +625,8 @@ AES_encrypt: sub \$32,%rsp mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer + mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Lenc_prologue: mov %rdx,$key @@ -637,20 +653,29 @@ AES_encrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lenc_epilogue: ret +.cfi_endproc .size AES_encrypt,.-AES_encrypt ___ @@ -1190,15 +1215,23 @@ $code.=<<___; .hidden asm_AES_decrypt asm_AES_decrypt: AES_decrypt: +.cfi_startproc + mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule - mov %rsp,%r10 lea -63(%rdx),%rcx # %rdx is key argument and \$-64,%rsp sub %rsp,%rcx @@ -1208,7 +1241,8 @@ AES_decrypt: sub \$32,%rsp mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer + mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Ldec_prologue: mov %rdx,$key @@ -1237,20 +1271,29 @@ AES_decrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Ldec_epilogue: ret +.cfi_endproc .size AES_decrypt,.-AES_decrypt ___ #------------------------------------------------------------------# @@ -1282,30 +1325,42 @@ $code.=<<___; ___ } -# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,\@function,3 +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,\@function,3 .align 16 -private_AES_set_encrypt_key: +AES_set_encrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp - push %r12 # redundant, but allows to share +.cfi_push %rbp + push %r12 # redundant, but allows to share +.cfi_push %r12 push %r13 # exception handler... +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 sub \$8,%rsp +.cfi_adjust_cfa_offset 8 .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Lenc_key_epilogue: ret -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.cfi_endproc +.size AES_set_encrypt_key,.-AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 @@ -1417,7 +1472,7 @@ $code.=<<___; xor %rax,%rax jmp .Lexit -.L14rounds: +.L14rounds: mov 0(%rsi),%rax # copy first 8 dwords mov 8(%rsi),%rbx mov 16(%rsi),%rcx @@ -1548,20 +1603,28 @@ $code.=<<___; ___ } -# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,\@function,3 +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,\@function,3 .align 16 -private_AES_set_decrypt_key: +AES_set_decrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 push %rdx # save key schedule +.cfi_adjust_cfa_offset 8 .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key @@ -1615,15 +1678,23 @@ $code.=<<___; xor %rax,%rax .Labort: mov 8(%rsp),%r15 +.cfi_restore %r15 mov 16(%rsp),%r14 +.cfi_restore %r14 mov 24(%rsp),%r13 +.cfi_restore %r13 mov 32(%rsp),%r12 +.cfi_restore %r12 mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Ldec_key_epilogue: ret -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.cfi_endproc +.size AES_set_decrypt_key,.-AES_set_decrypt_key ___ # void AES_cbc_encrypt (const void char *inp, unsigned char *out, @@ -1653,25 +1724,32 @@ $code.=<<___; .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: AES_cbc_encrypt: +.cfi_startproc cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq +.cfi_push 49 # %rflags push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lcbc_prologue: cld mov %r9d,%r9d # clear upper half of enc lea .LAES_Te(%rip),$sbox + lea .LAES_Td(%rip),%r10 cmp \$0,%r9 - jne .Lcbc_picked_te - lea .LAES_Td(%rip),$sbox -.Lcbc_picked_te: + cmoveq %r10,$sbox mov OPENSSL_ia32cap_P(%rip),%r10d cmp \$$speed_limit,%rdx @@ -1707,8 +1785,10 @@ AES_cbc_encrypt: .Lcbc_te_ok: xchg %rsp,$key +.cfi_def_cfa_register $key #add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_fast_body: mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out @@ -1938,7 +2018,7 @@ AES_cbc_encrypt: lea ($key,%rax),%rax mov %rax,$keyend - # pick Te4 copy which can't "overlap" with stack frame or key scdedule + # pick Te4 copy which can't "overlap" with stack frame or key schedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax @@ -2090,17 +2170,27 @@ AES_cbc_encrypt: .align 16 .Lcbc_exit: mov $_rsp,%rsi +.cfi_def_cfa %rsi,64 mov (%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq +.cfi_pop 49 # %rflags .Lcbc_epilogue: ret +.cfi_endproc .size AES_cbc_encrypt,.-AES_cbc_encrypt ___ } @@ -2573,7 +2663,6 @@ block_se_handler: jae .Lin_block_prologue mov 24(%rax),%rax # pull saved real stack pointer - lea 48(%rax),%rax # adjust... mov -8(%rax),%rbx mov -16(%rax),%rbp @@ -2770,13 +2859,13 @@ cbc_se_handler: .rva .LSEH_end_AES_decrypt .rva .LSEH_info_AES_decrypt - .rva .LSEH_begin_private_AES_set_encrypt_key - .rva .LSEH_end_private_AES_set_encrypt_key - .rva .LSEH_info_private_AES_set_encrypt_key + .rva .LSEH_begin_AES_set_encrypt_key + .rva .LSEH_end_AES_set_encrypt_key + .rva .LSEH_info_AES_set_encrypt_key - .rva .LSEH_begin_private_AES_set_decrypt_key - .rva .LSEH_end_private_AES_set_decrypt_key - .rva .LSEH_info_private_AES_set_decrypt_key + .rva .LSEH_begin_AES_set_decrypt_key + .rva .LSEH_end_AES_set_decrypt_key + .rva .LSEH_info_AES_set_decrypt_key .rva .LSEH_begin_AES_cbc_encrypt .rva .LSEH_end_AES_cbc_encrypt @@ -2792,11 +2881,11 @@ cbc_se_handler: .byte 9,0,0,0 .rva block_se_handler .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_private_AES_set_encrypt_key: +.LSEH_info_AES_set_encrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] -.LSEH_info_private_AES_set_decrypt_key: +.LSEH_info_AES_set_decrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] diff --git a/crypto/aes/asm/aesfx-sparcv9.pl b/crypto/aes/asm/aesfx-sparcv9.pl new file mode 100755 index 000000000000..9ddf0b4b00ec --- /dev/null +++ b/crypto/aes/asm/aesfx-sparcv9.pl @@ -0,0 +1,1270 @@ +#! /usr/bin/env perl +# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# March 2016 +# +# Initial support for Fujitsu SPARC64 X/X+ comprises minimally +# required key setup and single-block procedures. +# +# April 2016 +# +# Add "teaser" CBC and CTR mode-specific subroutines. "Teaser" means +# that parallelizable nature of CBC decrypt and CTR is not utilized +# yet. CBC encrypt on the other hand is as good as it can possibly +# get processing one byte in 4.1 cycles with 128-bit key on SPARC64 X. +# This is ~6x faster than pure software implementation... +# +# July 2016 +# +# Switch from faligndata to fshiftorx, which allows to omit alignaddr +# instructions and improve single-block and short-input performance +# with misaligned data. + +$output = pop; +open STDOUT,">$output"; + +{ +my ($inp,$out,$key,$rounds,$tmp,$mask) = map("%o$_",(0..5)); + +$code.=<<___; +#include "sparc_arch.h" + +#define LOCALS (STACK_BIAS+STACK_FRAME) + +.text + +.globl aes_fx_encrypt +.align 32 +aes_fx_encrypt: + and $inp, 7, $tmp ! is input aligned? + andn $inp, 7, $inp + ldd [$key + 0], %f6 ! round[0] + ldd [$key + 8], %f8 + mov %o7, %g1 + ld [$key + 240], $rounds + +1: call .+8 + add %o7, .Linp_align-1b, %o7 + + sll $tmp, 3, $tmp + ldd [$inp + 0], %f0 ! load input + brz,pt $tmp, .Lenc_inp_aligned + ldd [$inp + 8], %f2 + + ldd [%o7 + $tmp], %f14 ! shift left params + ldd [$inp + 16], %f4 + fshiftorx %f0, %f2, %f14, %f0 + fshiftorx %f2, %f4, %f14, %f2 + +.Lenc_inp_aligned: + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fxor %f0, %f6, %f0 ! ^=round[0] + fxor %f2, %f8, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + add $key, 32, $key + sub $rounds, 4, $rounds + +.Loop_enc: + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 16], %f10 + ldd [$key + 24], %f12 + add $key, 32, $key + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$key + 0], %f6 + ldd [$key + 8], %f8 + + brnz,a $rounds, .Loop_enc + sub $rounds, 2, $rounds + + andcc $out, 7, $tmp ! is output aligned? + andn $out, 7, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + add %o7, 64, %o7 + sll $tmp, 3, $tmp + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [%o7 + $tmp], %f14 ! shift right params + + fmovd %f0, %f4 + faesenclx %f2, %f6, %f0 + faesenclx %f4, %f8, %f2 + + bnz,pn %icc, .Lenc_out_unaligned + mov %g1, %o7 + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +.align 16 +.Lenc_out_unaligned: + add $out, 16, $inp + orn %g0, $mask, $tmp + fshiftorx %f0, %f0, %f14, %f4 + fshiftorx %f0, %f2, %f14, %f6 + fshiftorx %f2, %f2, %f14, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + stda %f8, [$inp + $tmp]0xc0 ! partial store + retl + nop +.type aes_fx_encrypt,#function +.size aes_fx_encrypt,.-aes_fx_encrypt + +.globl aes_fx_decrypt +.align 32 +aes_fx_decrypt: + and $inp, 7, $tmp ! is input aligned? + andn $inp, 7, $inp + ldd [$key + 0], %f6 ! round[0] + ldd [$key + 8], %f8 + mov %o7, %g1 + ld [$key + 240], $rounds + +1: call .+8 + add %o7, .Linp_align-1b, %o7 + + sll $tmp, 3, $tmp + ldd [$inp + 0], %f0 ! load input + brz,pt $tmp, .Ldec_inp_aligned + ldd [$inp + 8], %f2 + + ldd [%o7 + $tmp], %f14 ! shift left params + ldd [$inp + 16], %f4 + fshiftorx %f0, %f2, %f14, %f0 + fshiftorx %f2, %f4, %f14, %f2 + +.Ldec_inp_aligned: + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fxor %f0, %f6, %f0 ! ^=round[0] + fxor %f2, %f8, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + add $key, 32, $key + sub $rounds, 4, $rounds + +.Loop_dec: + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$key + 16], %f10 + ldd [$key + 24], %f12 + add $key, 32, $key + + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + ldd [$key + 0], %f6 + ldd [$key + 8], %f8 + + brnz,a $rounds, .Loop_dec + sub $rounds, 2, $rounds + + andcc $out, 7, $tmp ! is output aligned? + andn $out, 7, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + add %o7, 64, %o7 + sll $tmp, 3, $tmp + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [%o7 + $tmp], %f14 ! shift right params + + fmovd %f0, %f4 + faesdeclx %f2, %f6, %f0 + faesdeclx %f4, %f8, %f2 + + bnz,pn %icc, .Ldec_out_unaligned + mov %g1, %o7 + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +.align 16 +.Ldec_out_unaligned: + add $out, 16, $inp + orn %g0, $mask, $tmp + fshiftorx %f0, %f0, %f14, %f4 + fshiftorx %f0, %f2, %f14, %f6 + fshiftorx %f2, %f2, %f14, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + stda %f8, [$inp + $tmp]0xc0 ! partial store + retl + nop +.type aes_fx_decrypt,#function +.size aes_fx_decrypt,.-aes_fx_decrypt +___ +} +{ +my ($inp,$bits,$out,$tmp,$inc) = map("%o$_",(0..5)); +$code.=<<___; +.globl aes_fx_set_decrypt_key +.align 32 +aes_fx_set_decrypt_key: + b .Lset_encrypt_key + mov -1, $inc + retl + nop +.type aes_fx_set_decrypt_key,#function +.size aes_fx_set_decrypt_key,.-aes_fx_set_decrypt_key + +.globl aes_fx_set_encrypt_key +.align 32 +aes_fx_set_encrypt_key: + mov 1, $inc + nop +.Lset_encrypt_key: + and $inp, 7, $tmp + andn $inp, 7, $inp + sll $tmp, 3, $tmp + mov %o7, %g1 + +1: call .+8 + add %o7, .Linp_align-1b, %o7 + + ldd [%o7 + $tmp], %f10 ! shift left params + mov %g1, %o7 + + cmp $bits, 192 + ldd [$inp + 0], %f0 + bl,pt %icc, .L128 + ldd [$inp + 8], %f2 + + be,pt %icc, .L192 + ldd [$inp + 16], %f4 + brz,pt $tmp, .L256aligned + ldd [$inp + 24], %f6 + + ldd [$inp + 32], %f8 + fshiftorx %f0, %f2, %f10, %f0 + fshiftorx %f2, %f4, %f10, %f2 + fshiftorx %f4, %f6, %f10, %f4 + fshiftorx %f6, %f8, %f10, %f6 + +.L256aligned: + mov 14, $bits + and $inc, `14*16`, $tmp + st $bits, [$out + 240] ! store rounds + add $out, $tmp, $out ! start or end of key schedule + sllx $inc, 4, $inc ! 16 or -16 +___ +for ($i=0; $i<6; $i++) { + $code.=<<___; + std %f0, [$out + 0] + faeskeyx %f6, `0x10+$i`, %f0 + std %f2, [$out + 8] + add $out, $inc, $out + faeskeyx %f0, 0x00, %f2 + std %f4, [$out + 0] + faeskeyx %f2, 0x01, %f4 + std %f6, [$out + 8] + add $out, $inc, $out + faeskeyx %f4, 0x00, %f6 +___ +} +$code.=<<___; + std %f0, [$out + 0] + faeskeyx %f6, `0x10+$i`, %f0 + std %f2, [$out + 8] + add $out, $inc, $out + faeskeyx %f0, 0x00, %f2 + std %f4,[$out + 0] + std %f6,[$out + 8] + add $out, $inc, $out + std %f0,[$out + 0] + std %f2,[$out + 8] + retl + xor %o0, %o0, %o0 ! return 0 + +.align 16 +.L192: + brz,pt $tmp, .L192aligned + nop + + ldd [$inp + 24], %f6 + fshiftorx %f0, %f2, %f10, %f0 + fshiftorx %f2, %f4, %f10, %f2 + fshiftorx %f4, %f6, %f10, %f4 + +.L192aligned: + mov 12, $bits + and $inc, `12*16`, $tmp + st $bits, [$out + 240] ! store rounds + add $out, $tmp, $out ! start or end of key schedule + sllx $inc, 4, $inc ! 16 or -16 +___ +for ($i=0; $i<8; $i+=2) { + $code.=<<___; + std %f0, [$out + 0] + faeskeyx %f4, `0x10+$i`, %f0 + std %f2, [$out + 8] + add $out, $inc, $out + faeskeyx %f0, 0x00, %f2 + std %f4, [$out + 0] + faeskeyx %f2, 0x00, %f4 + std %f0, [$out + 8] + add $out, $inc, $out + faeskeyx %f4, `0x10+$i+1`, %f0 + std %f2, [$out + 0] + faeskeyx %f0, 0x00, %f2 + std %f4, [$out + 8] + add $out, $inc, $out +___ +$code.=<<___ if ($i<6); + faeskeyx %f2, 0x00, %f4 +___ +} +$code.=<<___; + std %f0, [$out + 0] + std %f2, [$out + 8] + retl + xor %o0, %o0, %o0 ! return 0 + +.align 16 +.L128: + brz,pt $tmp, .L128aligned + nop + + ldd [$inp + 16], %f4 + fshiftorx %f0, %f2, %f10, %f0 + fshiftorx %f2, %f4, %f10, %f2 + +.L128aligned: + mov 10, $bits + and $inc, `10*16`, $tmp + st $bits, [$out + 240] ! store rounds + add $out, $tmp, $out ! start or end of key schedule + sllx $inc, 4, $inc ! 16 or -16 +___ +for ($i=0; $i<10; $i++) { + $code.=<<___; + std %f0, [$out + 0] + faeskeyx %f2, `0x10+$i`, %f0 + std %f2, [$out + 8] + add $out, $inc, $out + faeskeyx %f0, 0x00, %f2 +___ +} +$code.=<<___; + std %f0, [$out + 0] + std %f2, [$out + 8] + retl + xor %o0, %o0, %o0 ! return 0 +.type aes_fx_set_encrypt_key,#function +.size aes_fx_set_encrypt_key,.-aes_fx_set_encrypt_key +___ +} +{ +my ($inp,$out,$len,$key,$ivp,$dir) = map("%i$_",(0..5)); +my ($rounds,$inner,$end,$inc,$ialign,$oalign,$mask) = map("%l$_",(0..7)); +my ($iv0,$iv1,$r0hi,$r0lo,$rlhi,$rllo,$in0,$in1,$intail,$outhead,$fshift) + = map("%f$_",grep { !($_ & 1) } (16 .. 62)); +my ($ileft,$iright) = ($ialign,$oalign); + +$code.=<<___; +.globl aes_fx_cbc_encrypt +.align 32 +aes_fx_cbc_encrypt: + save %sp, -STACK_FRAME-16, %sp + srln $len, 4, $len + and $inp, 7, $ialign + andn $inp, 7, $inp + brz,pn $len, .Lcbc_no_data + sll $ialign, 3, $ileft + +1: call .+8 + add %o7, .Linp_align-1b, %o7 + + ld [$key + 240], $rounds + and $out, 7, $oalign + ld [$ivp + 0], %f0 ! load ivec + andn $out, 7, $out + ld [$ivp + 4], %f1 + sll $oalign, 3, $mask + ld [$ivp + 8], %f2 + ld [$ivp + 12], %f3 + + sll $rounds, 4, $rounds + add $rounds, $key, $end + ldd [$key + 0], $r0hi ! round[0] + ldd [$key + 8], $r0lo + + add $inp, 16, $inp + sub $len, 1, $len + ldd [$end + 0], $rlhi ! round[last] + ldd [$end + 8], $rllo + + mov 16, $inc + movrz $len, 0, $inc + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + ldd [%o7 + $ileft], $fshift ! shift left params + add %o7, 64, %o7 + ldd [$inp - 16], $in0 ! load input + ldd [$inp - 8], $in1 + ldda [$inp]0x82, $intail ! non-faulting load + brz $dir, .Lcbc_decrypt + add $inp, $inc, $inp ! inp+=16 + + fxor $r0hi, %f0, %f0 ! ivec^=round[0] + fxor $r0lo, %f2, %f2 + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + nop + +.Loop_cbc_enc: + fxor $in0, %f0, %f0 ! inp^ivec^round[0] + fxor $in1, %f2, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + add $key, 32, $end + sub $rounds, 16*6, $inner + +.Lcbc_enc: + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lcbc_enc + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + + movrz $len, 0, $inc + fmovd $intail, $in0 + ldd [$inp - 8], $in1 ! load next input block + ldda [$inp]0x82, $intail ! non-faulting load + add $inp, $inc, $inp ! inp+=16 + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fxor $r0hi, $in0, $in0 ! inp^=round[0] + fxor $r0lo, $in1, $in1 + + fmovd %f0, %f4 + faesenclx %f2, $rlhi, %f0 + faesenclx %f4, $rllo, %f2 + + brnz,pn $oalign, .Lcbc_enc_unaligned_out + nop + + std %f0, [$out + 0] + std %f2, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_cbc_enc + sub $len, 1, $len + + st %f0, [$ivp + 0] ! output ivec + st %f1, [$ivp + 4] + st %f2, [$ivp + 8] + st %f3, [$ivp + 12] + +.Lcbc_no_data: + ret + restore + +.align 32 +.Lcbc_enc_unaligned_out: + ldd [%o7 + $mask], $fshift ! shift right params + mov 0xff, $mask + srl $mask, $oalign, $mask + sub %g0, $ileft, $iright + + fshiftorx %f0, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + + stda %f6, [$out + $mask]0xc0 ! partial store + orn %g0, $mask, $mask + std %f8, [$out + 8] + add $out, 16, $out + brz $len, .Lcbc_enc_unaligned_out_done + sub $len, 1, $len + b .Loop_cbc_enc_unaligned_out + nop + +.align 32 +.Loop_cbc_enc_unaligned_out: + fmovd %f2, $outhead + fxor $in0, %f0, %f0 ! inp^ivec^round[0] + fxor $in1, %f2, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 48], %f10 ! round[3] + ldd [$key + 56], %f12 + + ldx [$inp - 16], %o0 + ldx [$inp - 8], %o1 + brz $ileft, .Lcbc_enc_aligned_inp + movrz $len, 0, $inc + + ldx [$inp], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 + +.Lcbc_enc_aligned_inp: + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$key + 64], %f6 ! round[4] + ldd [$key + 72], %f8 + add $key, 64, $end + sub $rounds, 16*8, $inner + + stx %o0, [%sp + LOCALS + 0] + stx %o1, [%sp + LOCALS + 8] + add $inp, $inc, $inp ! inp+=16 + nop + +.Lcbc_enc_unaligned: + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lcbc_enc_unaligned + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + + ldd [%sp + LOCALS + 0], $in0 + ldd [%sp + LOCALS + 8], $in1 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fxor $r0hi, $in0, $in0 ! inp^=round[0] + fxor $r0lo, $in1, $in1 + + fmovd %f0, %f4 + faesenclx %f2, $rlhi, %f0 + faesenclx %f4, $rllo, %f2 + + fshiftorx $outhead, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + std %f6, [$out + 0] + std %f8, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_cbc_enc_unaligned_out + sub $len, 1, $len + +.Lcbc_enc_unaligned_out_done: + fshiftorx %f2, %f2, $fshift, %f8 + stda %f8, [$out + $mask]0xc0 ! partial store + + st %f0, [$ivp + 0] ! output ivec + st %f1, [$ivp + 4] + st %f2, [$ivp + 8] + st %f3, [$ivp + 12] + + ret + restore + +.align 32 +.Lcbc_decrypt: + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + fmovd %f0, $iv0 + fmovd %f2, $iv1 + +.Loop_cbc_dec: + fxor $in0, $r0hi, %f0 ! inp^round[0] + fxor $in1, $r0lo, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + add $key, 32, $end + sub $rounds, 16*6, $inner + +.Lcbc_dec: + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lcbc_dec + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + fxor $iv0, $rlhi, %f6 ! ivec^round[last] + fxor $iv1, $rllo, %f8 + fmovd $in0, $iv0 + fmovd $in1, $iv1 + + movrz $len, 0, $inc + fmovd $intail, $in0 + ldd [$inp - 8], $in1 ! load next input block + ldda [$inp]0x82, $intail ! non-faulting load + add $inp, $inc, $inp ! inp+=16 + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + + fmovd %f0, %f4 + faesdeclx %f2, %f6, %f0 + faesdeclx %f4, %f8, %f2 + + brnz,pn $oalign, .Lcbc_dec_unaligned_out + nop + + std %f0, [$out + 0] + std %f2, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_cbc_dec + sub $len, 1, $len + + st $iv0, [$ivp + 0] ! output ivec + st $iv0#lo, [$ivp + 4] + st $iv1, [$ivp + 8] + st $iv1#lo, [$ivp + 12] + + ret + restore + +.align 32 +.Lcbc_dec_unaligned_out: + ldd [%o7 + $mask], $fshift ! shift right params + mov 0xff, $mask + srl $mask, $oalign, $mask + sub %g0, $ileft, $iright + + fshiftorx %f0, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + + stda %f6, [$out + $mask]0xc0 ! partial store + orn %g0, $mask, $mask + std %f8, [$out + 8] + add $out, 16, $out + brz $len, .Lcbc_dec_unaligned_out_done + sub $len, 1, $len + b .Loop_cbc_dec_unaligned_out + nop + +.align 32 +.Loop_cbc_dec_unaligned_out: + fmovd %f2, $outhead + fxor $in0, $r0hi, %f0 ! inp^round[0] + fxor $in1, $r0lo, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$key + 48], %f10 ! round[3] + ldd [$key + 56], %f12 + + ldx [$inp - 16], %o0 + ldx [$inp - 8], %o1 + brz $ileft, .Lcbc_dec_aligned_inp + movrz $len, 0, $inc + + ldx [$inp], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 + +.Lcbc_dec_aligned_inp: + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + ldd [$key + 64], %f6 ! round[4] + ldd [$key + 72], %f8 + add $key, 64, $end + sub $rounds, 16*8, $inner + + stx %o0, [%sp + LOCALS + 0] + stx %o1, [%sp + LOCALS + 8] + add $inp, $inc, $inp ! inp+=16 + nop + +.Lcbc_dec_unaligned: + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lcbc_dec_unaligned + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + + fmovd %f0, %f4 + faesdecx %f2, %f6, %f0 + faesdecx %f4, %f8, %f2 + + fxor $iv0, $rlhi, %f6 ! ivec^round[last] + fxor $iv1, $rllo, %f8 + fmovd $in0, $iv0 + fmovd $in1, $iv1 + ldd [%sp + LOCALS + 0], $in0 + ldd [%sp + LOCALS + 8], $in1 + + fmovd %f0, %f4 + faesdecx %f2, %f10, %f0 + faesdecx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fmovd %f0, %f4 + faesdeclx %f2, %f6, %f0 + faesdeclx %f4, %f8, %f2 + + fshiftorx $outhead, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + std %f6, [$out + 0] + std %f8, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_cbc_dec_unaligned_out + sub $len, 1, $len + +.Lcbc_dec_unaligned_out_done: + fshiftorx %f2, %f2, $fshift, %f8 + stda %f8, [$out + $mask]0xc0 ! partial store + + st $iv0, [$ivp + 0] ! output ivec + st $iv0#lo, [$ivp + 4] + st $iv1, [$ivp + 8] + st $iv1#lo, [$ivp + 12] + + ret + restore +.type aes_fx_cbc_encrypt,#function +.size aes_fx_cbc_encrypt,.-aes_fx_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key,$ivp) = map("%i$_",(0..5)); +my ($rounds,$inner,$end,$inc,$ialign,$oalign,$mask) = map("%l$_",(0..7)); +my ($ctr0,$ctr1,$r0hi,$r0lo,$rlhi,$rllo,$in0,$in1,$intail,$outhead,$fshift) + = map("%f$_",grep { !($_ & 1) } (16 .. 62)); +my ($ileft,$iright) = ($ialign, $oalign); +my $one = "%f14"; + +$code.=<<___; +.globl aes_fx_ctr32_encrypt_blocks +.align 32 +aes_fx_ctr32_encrypt_blocks: + save %sp, -STACK_FRAME-16, %sp + srln $len, 0, $len + and $inp, 7, $ialign + andn $inp, 7, $inp + brz,pn $len, .Lctr32_no_data + sll $ialign, 3, $ileft + +.Lpic: call .+8 + add %o7, .Linp_align - .Lpic, %o7 + + ld [$key + 240], $rounds + and $out, 7, $oalign + ld [$ivp + 0], $ctr0 ! load counter + andn $out, 7, $out + ld [$ivp + 4], $ctr0#lo + sll $oalign, 3, $mask + ld [$ivp + 8], $ctr1 + ld [$ivp + 12], $ctr1#lo + ldd [%o7 + 128], $one + + sll $rounds, 4, $rounds + add $rounds, $key, $end + ldd [$key + 0], $r0hi ! round[0] + ldd [$key + 8], $r0lo + + add $inp, 16, $inp + sub $len, 1, $len + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + mov 16, $inc + movrz $len, 0, $inc + ldd [$end + 0], $rlhi ! round[last] + ldd [$end + 8], $rllo + + ldd [%o7 + $ileft], $fshift ! shiftleft params + add %o7, 64, %o7 + ldd [$inp - 16], $in0 ! load input + ldd [$inp - 8], $in1 + ldda [$inp]0x82, $intail ! non-faulting load + add $inp, $inc, $inp ! inp+=16 + + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + +.Loop_ctr32: + fxor $ctr0, $r0hi, %f0 ! counter^round[0] + fxor $ctr1, $r0lo, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + add $key, 32, $end + sub $rounds, 16*6, $inner + +.Lctr32_enc: + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lctr32_enc + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + fxor $in0, $rlhi, %f6 ! inp^round[last] + fxor $in1, $rllo, %f8 + + movrz $len, 0, $inc + fmovd $intail, $in0 + ldd [$inp - 8], $in1 ! load next input block + ldda [$inp]0x82, $intail ! non-faulting load + add $inp, $inc, $inp ! inp+=16 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fshiftorx $in0, $in1, $fshift, $in0 + fshiftorx $in1, $intail, $fshift, $in1 + fpadd32 $ctr1, $one, $ctr1 ! increment counter + + fmovd %f0, %f4 + faesenclx %f2, %f6, %f0 + faesenclx %f4, %f8, %f2 + + brnz,pn $oalign, .Lctr32_unaligned_out + nop + + std %f0, [$out + 0] + std %f2, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_ctr32 + sub $len, 1, $len + +.Lctr32_no_data: + ret + restore + +.align 32 +.Lctr32_unaligned_out: + ldd [%o7 + $mask], $fshift ! shift right params + mov 0xff, $mask + srl $mask, $oalign, $mask + sub %g0, $ileft, $iright + + fshiftorx %f0, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + + stda %f6, [$out + $mask]0xc0 ! partial store + orn %g0, $mask, $mask + std %f8, [$out + 8] + add $out, 16, $out + brz $len, .Lctr32_unaligned_out_done + sub $len, 1, $len + b .Loop_ctr32_unaligned_out + nop + +.align 32 +.Loop_ctr32_unaligned_out: + fmovd %f2, $outhead + fxor $ctr0, $r0hi, %f0 ! counter^round[0] + fxor $ctr1, $r0lo, %f2 + ldd [$key + 32], %f6 ! round[2] + ldd [$key + 40], %f8 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 48], %f10 ! round[3] + ldd [$key + 56], %f12 + + ldx [$inp - 16], %o0 + ldx [$inp - 8], %o1 + brz $ileft, .Lctr32_aligned_inp + movrz $len, 0, $inc + + ldx [$inp], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 + +.Lctr32_aligned_inp: + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$key + 64], %f6 ! round[4] + ldd [$key + 72], %f8 + add $key, 64, $end + sub $rounds, 16*8, $inner + + stx %o0, [%sp + LOCALS + 0] + stx %o1, [%sp + LOCALS + 8] + add $inp, $inc, $inp ! inp+=16 + nop + +.Lctr32_enc_unaligned: + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 + ldd [$end + 24], %f12 + add $end, 32, $end + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + ldd [$end + 0], %f6 + ldd [$end + 8], %f8 + + brnz,a $inner, .Lctr32_enc_unaligned + sub $inner, 16*2, $inner + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$end + 16], %f10 ! round[last-1] + ldd [$end + 24], %f12 + fpadd32 $ctr1, $one, $ctr1 ! increment counter + + fmovd %f0, %f4 + faesencx %f2, %f6, %f0 + faesencx %f4, %f8, %f2 + fxor $in0, $rlhi, %f6 ! inp^round[last] + fxor $in1, $rllo, %f8 + ldd [%sp + LOCALS + 0], $in0 + ldd [%sp + LOCALS + 8], $in1 + + fmovd %f0, %f4 + faesencx %f2, %f10, %f0 + faesencx %f4, %f12, %f2 + ldd [$key + 16], %f10 ! round[1] + ldd [$key + 24], %f12 + + fmovd %f0, %f4 + faesenclx %f2, %f6, %f0 + faesenclx %f4, %f8, %f2 + + fshiftorx $outhead, %f0, $fshift, %f6 + fshiftorx %f0, %f2, $fshift, %f8 + std %f6, [$out + 0] + std %f8, [$out + 8] + add $out, 16, $out + + brnz,a $len, .Loop_ctr32_unaligned_out + sub $len, 1, $len + +.Lctr32_unaligned_out_done: + fshiftorx %f2, %f2, $fshift, %f8 + stda %f8, [$out + $mask]0xc0 ! partial store + + ret + restore +.type aes_fx_ctr32_encrypt_blocks,#function +.size aes_fx_ctr32_encrypt_blocks,.-aes_fx_ctr32_encrypt_blocks + +.align 32 +.Linp_align: ! fshiftorx parameters for left shift toward %rs1 + .byte 0, 0, 64, 0, 0, 64, 0, -64 + .byte 0, 0, 56, 8, 0, 56, 8, -56 + .byte 0, 0, 48, 16, 0, 48, 16, -48 + .byte 0, 0, 40, 24, 0, 40, 24, -40 + .byte 0, 0, 32, 32, 0, 32, 32, -32 + .byte 0, 0, 24, 40, 0, 24, 40, -24 + .byte 0, 0, 16, 48, 0, 16, 48, -16 + .byte 0, 0, 8, 56, 0, 8, 56, -8 +.Lout_align: ! fshiftorx parameters for right shift toward %rs2 + .byte 0, 0, 0, 64, 0, 0, 64, 0 + .byte 0, 0, 8, 56, 0, 8, 56, -8 + .byte 0, 0, 16, 48, 0, 16, 48, -16 + .byte 0, 0, 24, 40, 0, 24, 40, -24 + .byte 0, 0, 32, 32, 0, 32, 32, -32 + .byte 0, 0, 40, 24, 0, 40, 24, -40 + .byte 0, 0, 48, 16, 0, 48, 16, -48 + .byte 0, 0, 56, 8, 0, 56, 8, -56 +.Lone: + .word 0, 1 +.asciz "AES for Fujitsu SPARC64 X, CRYPTOGAMS by <appro\@openssl.org>" +.align 4 +___ +} +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %visopf = ( "faligndata" => 0x048, + "bshuffle" => 0x04c, + "fpadd32" => 0x052, + "fxor" => 0x06c, + "fsrc2" => 0x078 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "alignaddr" => 0x018, + "bmask" => 0x019, + "alignaddrl" => 0x01a ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unfx { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "faesencx" => 0x90, + "faesdecx" => 0x91, + "faesenclx" => 0x92, + "faesdeclx" => 0x93, + "faeskeyx" => 0x94 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + $rs2 = ($rs2 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs2; + $rs2 = oct($rs2) if ($rs2 =~ /^0/); + + foreach ($rs1,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unfx3src { +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "fshiftorx" => 0x0b ); + + $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + foreach ($rs1,$rs2,$rs3,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x37<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/%f([0-9]+)#lo/sprintf "%%f%d",$1+1/ge; + + s/\b(faes[^x]{3,4}x)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ + &unfx($1,$2,$3,$4) + /ge or + s/\b([f][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unfx3src($1,$2,$3,$4,$5) + /ge or + s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /ge or + s/\b(alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /ge; + print $_,"\n"; +} + +close STDOUT; diff --git a/crypto/aes/asm/aesni-mb-x86_64.pl b/crypto/aes/asm/aesni-mb-x86_64.pl index d7ad7882c4ee..1f356d2d3fbb 100755 --- a/crypto/aes/asm/aesni-mb-x86_64.pl +++ b/crypto/aes/asm/aesni-mb-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -67,7 +74,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; # void aesni_multi_cbc_encrypt ( @@ -98,6 +105,7 @@ $code.=<<___; .type aesni_multi_cbc_encrypt,\@function,3 .align 32 aesni_multi_cbc_encrypt: +.cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num @@ -111,12 +119,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -127,7 +142,7 @@ $code.=<<___ if ($win64); movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) - movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ @@ -141,6 +156,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp +.cfi_cfa_expression %rsp+16,deref,+8 .Lenc4x_body: movdqu ($key),$zero # 0-round key @@ -301,9 +317,9 @@ $code.=<<___; movups @out[0],-16(@outptr[0],$offset) pxor @inp[0],@out[0] - movups @out[1],-16(@outptr[1],$offset) + movups @out[1],-16(@outptr[1],$offset) pxor @inp[1],@out[1] - movups @out[2],-16(@outptr[2],$offset) + movups @out[2],-16(@outptr[2],$offset) pxor @inp[2],@out[2] movups @out[3],-16(@outptr[3],$offset) pxor @inp[3],@out[3] @@ -312,6 +328,7 @@ $code.=<<___; jnz .Loop_enc4x mov 16(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 mov 24(%rsp),$num #pxor @inp[0],@out[0] @@ -343,20 +360,29 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lenc4x_epilogue: ret +.cfi_endproc .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt .globl aesni_multi_cbc_decrypt .type aesni_multi_cbc_decrypt,\@function,3 .align 32 aesni_multi_cbc_decrypt: +.cfi_startproc ___ $code.=<<___ if ($avx); cmp \$2,$num @@ -370,12 +396,19 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -386,7 +419,7 @@ $code.=<<___ if ($win64); movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) - movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ @@ -400,6 +433,7 @@ $code.=<<___; sub \$48,%rsp and \$-64,%rsp mov %rax,16(%rsp) # original %rsp +.cfi_cfa_expression %rsp+16,deref,+8 .Ldec4x_body: movdqu ($key),$zero # 0-round key @@ -556,10 +590,10 @@ $code.=<<___; movups @out[0],-16(@outptr[0],$offset) movdqu (@inptr[0],$offset),@out[0] - movups @out[1],-16(@outptr[1],$offset) + movups @out[1],-16(@outptr[1],$offset) movdqu (@inptr[1],$offset),@out[1] pxor $zero,@out[0] - movups @out[2],-16(@outptr[2],$offset) + movups @out[2],-16(@outptr[2],$offset) movdqu (@inptr[2],$offset),@out[2] pxor $zero,@out[1] movups @out[3],-16(@outptr[3],$offset) @@ -571,6 +605,7 @@ $code.=<<___; jnz .Loop_dec4x mov 16(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 mov 24(%rsp),$num lea `40*4`($inp),$inp @@ -593,14 +628,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Ldec4x_epilogue: ret +.cfi_endproc .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt ___ @@ -616,14 +659,22 @@ $code.=<<___; .type aesni_multi_cbc_encrypt_avx,\@function,3 .align 32 aesni_multi_cbc_encrypt_avx: +.cfi_startproc _avx_cbc_enc_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -650,6 +701,7 @@ $code.=<<___; sub \$192,%rsp and \$-128,%rsp mov %rax,16(%rsp) # original %rsp +.cfi_cfa_expression %rsp+16,deref,+8 .Lenc8x_body: vzeroupper @@ -828,10 +880,10 @@ $code.=<<___; vmovups @out[0],-16(@ptr[0]) # write output sub $offset,@ptr[0] # switch to input vpxor 0x00($offload),@out[0],@out[0] - vmovups @out[1],-16(@ptr[1]) + vmovups @out[1],-16(@ptr[1]) sub `64+1*8`(%rsp),@ptr[1] vpxor 0x10($offload),@out[1],@out[1] - vmovups @out[2],-16(@ptr[2]) + vmovups @out[2],-16(@ptr[2]) sub `64+2*8`(%rsp),@ptr[2] vpxor 0x20($offload),@out[2],@out[2] vmovups @out[3],-16(@ptr[3]) @@ -840,10 +892,10 @@ $code.=<<___; vmovups @out[4],-16(@ptr[4]) sub `64+4*8`(%rsp),@ptr[4] vpxor @inp[0],@out[4],@out[4] - vmovups @out[5],-16(@ptr[5]) + vmovups @out[5],-16(@ptr[5]) sub `64+5*8`(%rsp),@ptr[5] vpxor @inp[1],@out[5],@out[5] - vmovups @out[6],-16(@ptr[6]) + vmovups @out[6],-16(@ptr[6]) sub `64+6*8`(%rsp),@ptr[6] vpxor @inp[2],@out[6],@out[6] vmovups @out[7],-16(@ptr[7]) @@ -854,6 +906,7 @@ $code.=<<___; jnz .Loop_enc8x mov 16(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num @@ -876,27 +929,43 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lenc8x_epilogue: ret +.cfi_endproc .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx .type aesni_multi_cbc_decrypt_avx,\@function,3 .align 32 aesni_multi_cbc_decrypt_avx: +.cfi_startproc _avx_cbc_dec_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -925,6 +994,7 @@ $code.=<<___; and \$-256,%rsp sub \$192,%rsp mov %rax,16(%rsp) # original %rsp +.cfi_cfa_expression %rsp+16,deref,+8 .Ldec8x_body: vzeroupper @@ -1121,12 +1191,12 @@ $code.=<<___; sub $offset,@ptr[0] # switch to input vmovdqu 128+0(%rsp),@out[0] vpxor 0x70($offload),@out[7],@out[7] - vmovups @out[1],-16(@ptr[1]) + vmovups @out[1],-16(@ptr[1]) sub `64+1*8`(%rsp),@ptr[1] vmovdqu @out[0],0x00($offload) vpxor $zero,@out[0],@out[0] vmovdqu 128+16(%rsp),@out[1] - vmovups @out[2],-16(@ptr[2]) + vmovups @out[2],-16(@ptr[2]) sub `64+2*8`(%rsp),@ptr[2] vmovdqu @out[1],0x10($offload) vpxor $zero,@out[1],@out[1] @@ -1142,11 +1212,11 @@ $code.=<<___; vpxor $zero,@out[3],@out[3] vmovdqu @inp[0],0x40($offload) vpxor @inp[0],$zero,@out[4] - vmovups @out[5],-16(@ptr[5]) + vmovups @out[5],-16(@ptr[5]) sub `64+5*8`(%rsp),@ptr[5] vmovdqu @inp[1],0x50($offload) vpxor @inp[1],$zero,@out[5] - vmovups @out[6],-16(@ptr[6]) + vmovups @out[6],-16(@ptr[6]) sub `64+6*8`(%rsp),@ptr[6] vmovdqu @inp[2],0x60($offload) vpxor @inp[2],$zero,@out[6] @@ -1160,6 +1230,7 @@ $code.=<<___; jnz .Loop_dec8x mov 16(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 #mov 24(%rsp),$num #lea `40*8`($inp),$inp #dec $num @@ -1182,14 +1253,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Ldec8x_epilogue: ret +.cfi_endproc .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx ___ }}} @@ -1246,10 +1325,10 @@ se_handler: mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore cotnext->R12 - mov %r13,224($context) # restore cotnext->R13 - mov %r14,232($context) # restore cotnext->R14 - mov %r15,240($context) # restore cotnext->R15 + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl index 7a30e893fbe6..b01a4c55c86a 100755 --- a/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -25,7 +32,10 @@ # Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%) # Ivy Bridge 5.05[+4.6] 9.65 5.54 +74% # Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%) +# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%) # Bulldozer 5.77[+6.0] 11.72 6.37 +84% +# Ryzen(**) 2.71[+1.93] 4.64 2.74 +69% +# Goldmont(**) 3.82[+1.70] 5.52 4.20 +31% # # AES-192-CBC # Westmere 4.51 9.81 6.80 +44% @@ -39,12 +49,16 @@ # Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%) # Ivy Bridge 7.05 11.65 7.12 +64% # Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%) +# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61%) # Bulldozer 8.00 13.95 8.25 +69% +# Ryzen(**) 3.71 5.64 3.72 +52% +# Goldmont(**) 5.35 7.05 5.76 +22% # # (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for # background information. Above numbers in parentheses are SSSE3 # results collected on AVX-capable CPU, i.e. apply on OSes that # don't support AVX. +# (**) SHAEXT results. # # Needless to mention that it makes no sense to implement "stitched" # *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1 @@ -100,7 +114,7 @@ $shaext=1; ### set to zero if compiling for 1.0.1 $stitched_decrypt=0; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; # void aesni_cbc_sha1_enc(const void *inp, @@ -177,16 +191,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_ssse3,\@function,6 .align 32 aesni_cbc_sha1_enc_ssse3: +.cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_ssse3 # debugging artefact push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp +.cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ @@ -298,7 +320,7 @@ ___ $r++; unshift(@rndkey,pop(@rndkey)); }; -sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 +sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4 { use integer; my $body = shift; my @insns = (&$body,&$body,&$body,&$body); # 40 instructions @@ -712,15 +734,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi +.cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,8 .Lepilogue_ssse3: ret +.cfi_endproc .size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 ___ @@ -784,7 +815,7 @@ sub body_00_19_dec () { # ((c^d)&b)^d sub body_20_39_dec () { # b^d^c # on entry @T[0]=b^d return &body_40_59_dec() if ($rx==39); - + my @r=@body_20_39; unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]); @@ -828,14 +859,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_ssse3,\@function,6 .align 32 aesni256_cbc_sha1_dec_ssse3: +.cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp +.cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) @@ -983,15 +1022,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi +.cfi_cfa_def %rsi,56 mov 0(%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_cfa_def %rsp,8 .Lepilogue_dec_ssse3: ret +.cfi_endproc .size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3 ___ }}} @@ -1017,16 +1065,24 @@ $code.=<<___; .type aesni_cbc_sha1_enc_avx,\@function,6 .align 32 aesni_cbc_sha1_enc_avx: +.cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact #jz .Lepilogue_avx # debugging artefact push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp +.cfi_adjust_cfa_offset `104+($win64?10*16:0)` #mov $in0,$inp # debugging artefact #lea 64(%rsp),$ctx # debugging artefact ___ @@ -1137,7 +1193,7 @@ ___ $r++; unshift(@rndkey,pop(@rndkey)); }; -sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 +sub Xupdate_avx_16_31() # recall that $Xi starts with 4 { use integer; my $body = shift; my @insns = (&$body,&$body,&$body,&$body); # 40 instructions @@ -1425,15 +1481,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi +.cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,8 .Lepilogue_avx: ret +.cfi_endproc .size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx ___ @@ -1482,14 +1547,22 @@ $code.=<<___; .type aesni256_cbc_sha1_dec_avx,\@function,6 .align 32 aesni256_cbc_sha1_dec_avx: +.cfi_startproc mov `($win64?56:8)`(%rsp),$inp # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea `-104-($win64?10*16:0)`(%rsp),%rsp +.cfi_adjust_cfa_offset `104+($win64?10*16:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,96+0(%rsp) @@ -1636,15 +1709,24 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea `104+($win64?10*16:0)`(%rsp),%rsi +.cfi_def_cfa %rsi,56 mov 0(%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,8 .Lepilogue_dec_avx: ret +.cfi_endproc .size aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx ___ }}} diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl index 588ade64ee52..ef460237108e 100755 --- a/crypto/aes/asm/aesni-sha256-x86_64.pl +++ b/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -21,17 +28,21 @@ # for standalone AESNI-CBC encrypt, standalone SHA256, and stitched # subroutine: # -# AES-128/-192/-256+SHA256 this(**)gain -# Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43% -# Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50% -# Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59% -# Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58% +# AES-128/-192/-256+SHA256 this(**) gain +# Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43% +# Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50% +# Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59% +# Skylake 2.62/3.14/3.62+7.70 8.10 +27%/34%/40% +# Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58% +# Ryzen(***) 2.71/-/3.71+2.05 2.74/-/3.73 +74%/-/54% +# Goldmont(***) 3.82/-/5.35+4.16 4.73/-/5.94 +69%/-/60% # -# (*) there are XOP, AVX1 and AVX2 code pathes, meaning that +# (*) there are XOP, AVX1 and AVX2 code paths, meaning that # Westmere is omitted from loop, this is because gain was not # estimated high enough to justify the effort; # (**) these are EVP-free results, results obtained with 'speed # -evp aes-256-cbc-hmac-sha256' will vary by percent or two; +# (***) these are SHAEXT results; $flavour = shift; $output = shift; @@ -66,7 +77,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $shaext=$avx; ### set to zero if compiling for 1.0.1 $avx=1 if (!$shaext && $avx); -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $func="aesni_cbc_sha256_enc"; @@ -101,7 +112,7 @@ $_key="16*$SZ+3*8(%rsp)"; $_ivp="16*$SZ+4*8(%rsp)"; $_ctx="16*$SZ+5*8(%rsp)"; $_in0="16*$SZ+6*8(%rsp)"; -$_rsp="16*$SZ+7*8(%rsp)"; +$_rsp="`16*$SZ+7*8`(%rsp)"; $framesz=16*$SZ+8*8; $code=<<___; @@ -331,15 +342,23 @@ $code.=<<___; .type ${func}_xop,\@function,6 .align 64 ${func}_xop: +.cfi_startproc .Lxop_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + mov %rsp,%rax # copy %rsp +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 - mov %rsp,%r11 # copy %rsp +.cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame @@ -355,7 +374,8 @@ ${func}_xop: mov $ivp,$_ivp mov $ctx,$_ctx mov $in0,$_in0 - mov %r11,$_rsp + mov %rax,$_rsp +.cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) @@ -593,6 +613,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi +.cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ @@ -609,15 +630,23 @@ $code.=<<___ if ($win64); movaps `$framesz+16*9`(%rsp),%xmm15 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_xop: ret +.cfi_endproc .size ${func}_xop,.-${func}_xop ___ ###################################################################### @@ -629,15 +658,23 @@ $code.=<<___; .type ${func}_avx,\@function,6 .align 64 ${func}_avx: +.cfi_startproc .Lavx_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + mov %rsp,%rax # copy %rsp +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 - mov %rsp,%r11 # copy %rsp +.cfi_push %r15 sub \$`$framesz+$win64*16*10`,%rsp and \$-64,%rsp # align stack frame @@ -653,7 +690,8 @@ ${func}_avx: mov $ivp,$_ivp mov $ctx,$_ctx mov $in0,$_in0 - mov %r11,$_rsp + mov %rax,$_rsp +.cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) @@ -844,6 +882,7 @@ $code.=<<___; mov $_ivp,$ivp mov $_rsp,%rsi +.cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ @@ -860,15 +899,23 @@ $code.=<<___ if ($win64); movaps `$framesz+16*9`(%rsp),%xmm15 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: ret +.cfi_endproc .size ${func}_avx,.-${func}_avx ___ @@ -876,7 +923,7 @@ if ($avx>1) {{ ###################################################################### # AVX2+BMI code path # -my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp +my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp my $PUSH8=8*2*$SZ; use integer; @@ -925,15 +972,23 @@ $code.=<<___; .type ${func}_avx2,\@function,6 .align 64 ${func}_avx2: +.cfi_startproc .Lavx2_shortcut: mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + mov %rsp,%rax # copy %rsp +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 - mov %rsp,%r11 # copy %rsp +.cfi_push %r15 sub \$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp and \$-256*$SZ,%rsp # align stack frame add \$`2*$SZ*($rounds-8)`,%rsp @@ -950,7 +1005,8 @@ ${func}_avx2: mov $ivp,$_ivp mov $ctx,$_ctx mov $in0,$_in0 - mov %r11,$_rsp + mov %rax,$_rsp +.cfi_cfa_expression $_rsp,deref,+8 ___ $code.=<<___ if ($win64); movaps %xmm6,`$framesz+16*0`(%rsp) @@ -1181,6 +1237,7 @@ $code.=<<___; lea ($Tbl),%rsp mov $_ivp,$ivp mov $_rsp,%rsi +.cfi_def_cfa %rsi,8 vmovdqu $iv,($ivp) # output IV vzeroall ___ @@ -1197,15 +1254,23 @@ $code.=<<___ if ($win64); movaps `$framesz+16*9`(%rsp),%xmm15 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: ret +.cfi_endproc .size ${func}_avx2,.-${func}_avx2 ___ }} @@ -1562,7 +1627,6 @@ ___ $code.=<<___; mov %rax,%rsi # put aside Rsp mov 16*$SZ+7*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax mov -8(%rax),%rbx mov -16(%rax),%rbp diff --git a/crypto/aes/asm/aesni-x86.pl b/crypto/aes/asm/aesni-x86.pl index 9b2e37aafb1a..b351fca28e02 100755 --- a/crypto/aes/asm/aesni-x86.pl +++ b/crypto/aes/asm/aesni-x86.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -43,16 +50,22 @@ # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. +# November 2015 +# +# Add aesni_ocb_[en|de]crypt. + ###################################################################### # Current large-block performance in cycles per byte processed with # 128-bit key (less is better). # -# CBC en-/decrypt CTR XTS ECB +# CBC en-/decrypt CTR XTS ECB OCB # Westmere 3.77/1.37 1.37 1.52 1.27 -# * Bridge 5.07/0.98 0.99 1.09 0.91 -# Haswell 4.44/0.80 0.97 1.03 0.72 -# Silvermont 5.77/3.56 3.67 4.03 3.46 -# Bulldozer 5.80/0.98 1.05 1.24 0.93 +# * Bridge 5.07/0.98 0.99 1.09 0.91 1.10 +# Haswell 4.44/0.80 0.97 1.03 0.72 0.76 +# Skylake 2.68/0.65 0.65 0.66 0.64 0.66 +# Silvermont 5.77/3.56 3.67 4.03 3.46 4.03 +# Goldmont 3.84/1.39 1.39 1.63 1.31 1.70 +# Bulldozer 5.80/0.98 1.05 1.24 0.93 1.23 $PREFIX="aesni"; # if $PREFIX is set to "AES", the script # generates drop-in replacement for @@ -63,7 +76,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],$0); +$output = pop; +open OUT,">$output"; +*STDOUT=*OUT; + +&asm_init($ARGV[0]); &external_label("OPENSSL_ia32cap_P"); &static_label("key_const"); @@ -222,7 +239,7 @@ sub aesni_generate1 # fully unrolled loop # can schedule aes[enc|dec] every cycle optimal interleave factor # equals to corresponding instructions latency. 8x is optimal for # * Bridge, but it's unfeasible to accommodate such implementation -# in XMM registers addreassable in 32-bit mode and therefo |